gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Types of uses.  */
 125 enum use_type
 126 {
 127   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 128   USE_ADDRESS,          /* Use in an address.  */
 129   USE_COMPARE           /* Use is a compare.  */
 130 };
 131
 132 /* The candidate - cost pair.  */
 133 struct cost_pair
 134 {
 135   struct iv_cand *cand; /* The candidate.  */
 136   unsigned cost;        /* The cost.  */
 137   bitmap depends_on;    /* The list of invariants that have to be
 138                            preserved.  */
 139   tree value;           /* For final value elimination, the expression for
 140                            the final value of the iv.  For iv elimination,
 141                            the new bound to compare with.  */
 142 };
 143
 144 /* Use.  */
 145 struct iv_use
 146 {
 147   unsigned id;          /* The id of the use.  */
 148   enum use_type type;   /* Type of the use.  */
 149   struct iv *iv;        /* The induction variable it is based on.  */
 150   tree stmt;            /* Statement in that it occurs.  */
 151   tree *op_p;           /* The place where it occurs.  */
 152   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 153                            important ones.  */
 154
 155   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 156   struct cost_pair *cost_map;
 157                         /* The costs wrto the iv candidates.  */
 158
 159   struct iv_cand *selected;
 160                         /* The selected candidate.  */
 161 };
 162
 163 /* The position where the iv is computed.  */
 164 enum iv_position
 165 {
 166   IP_NORMAL,            /* At the end, just before the exit condition.  */
 167   IP_END,               /* At the end of the latch block.  */
 168   IP_ORIGINAL           /* The original biv.  */
 169 };
 170
 171 /* The induction variable candidate.  */
 172 struct iv_cand
 173 {
 174   unsigned id;          /* The number of the candidate.  */
 175   bool important;       /* Whether this is an "important" candidate, i.e. such
 176                            that it should be considered by all uses.  */
 177   enum iv_position pos; /* Where it is computed.  */
 178   tree incremented_at;  /* For original biv, the statement where it is
 179                            incremented.  */
 180   tree var_before;      /* The variable used for it before increment.  */
 181   tree var_after;       /* The variable used for it after increment.  */
 182   struct iv *iv;        /* The value of the candidate.  NULL for
 183                            "pseudocandidate" used to indicate the possibility
 184                            to replace the final value of an iv by direct
 185                            computation of the value.  */
 186   unsigned cost;        /* Cost of the candidate.  */
 187   bitmap depends_on;    /* The list of invariants that are used in step of the
 188                            biv.  */
 189 };
 190
 191 /* The data used by the induction variable optimizations.  */
 192
 193 typedef struct iv_use *iv_use_p;
 194 DEF_VEC_P(iv_use_p);
 195 DEF_VEC_ALLOC_P(iv_use_p,heap);
 196
 197 typedef struct iv_cand *iv_cand_p;
 198 DEF_VEC_P(iv_cand_p);
 199 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 200
 201 struct ivopts_data
 202 {
 203   /* The currently optimized loop.  */
 204   struct loop *current_loop;
 205
 206   /* Number of registers used in it.  */
 207   unsigned regs_used;
 208
 209   /* Numbers of iterations for all exits of the current loop.  */
 210   htab_t niters;
 211
 212   /* The size of version_info array allocated.  */
 213   unsigned version_info_size;
 214
 215   /* The array of information for the ssa names.  */
 216   struct version_info *version_info;
 217
 218   /* The bitmap of indices in version_info whose value was changed.  */
 219   bitmap relevant;
 220
 221   /* The maximum invariant id.  */
 222   unsigned max_inv_id;
 223
 224   /* The uses of induction variables.  */
 225   VEC(iv_use_p,heap) *iv_uses;
 226
 227   /* The candidates.  */
 228   VEC(iv_cand_p,heap) *iv_candidates;
 229
 230   /* A bitmap of important candidates.  */
 231   bitmap important_candidates;
 232
 233   /* Whether to consider just related and important candidates when replacing a
 234      use.  */
 235   bool consider_all_candidates;
 236 };
 237
 238 /* An assignment of iv candidates to uses.  */
 239
 240 struct iv_ca
 241 {
 242   /* The number of uses covered by the assignment.  */
 243   unsigned upto;
 244
 245   /* Number of uses that cannot be expressed by the candidates in the set.  */
 246   unsigned bad_uses;
 247
 248   /* Candidate assigned to a use, together with the related costs.  */
 249   struct cost_pair **cand_for_use;
 250
 251   /* Number of times each candidate is used.  */
 252   unsigned *n_cand_uses;
 253
 254   /* The candidates used.  */
 255   bitmap cands;
 256
 257   /* The number of candidates in the set.  */
 258   unsigned n_cands;
 259
 260   /* Total number of registers needed.  */
 261   unsigned n_regs;
 262
 263   /* Total cost of expressing uses.  */
 264   unsigned cand_use_cost;
 265
 266   /* Total cost of candidates.  */
 267   unsigned cand_cost;
 268
 269   /* Number of times each invariant is used.  */
 270   unsigned *n_invariant_uses;
 271
 272   /* Total cost of the assignment.  */
 273   unsigned cost;
 274 };
 275
 276 /* Difference of two iv candidate assignments.  */
 277
 278 struct iv_ca_delta
 279 {
 280   /* Changed use.  */
 281   struct iv_use *use;
 282
 283   /* An old assignment (for rollback purposes).  */
 284   struct cost_pair *old_cp;
 285
 286   /* A new assignment.  */
 287   struct cost_pair *new_cp;
 288
 289   /* Next change in the list.  */
 290   struct iv_ca_delta *next_change;
 291 };
 292
 293 /* Bound on number of candidates below that all candidates are considered.  */
 294
 295 #define CONSIDER_ALL_CANDIDATES_BOUND \
 296   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 297
 298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 299    optimizing such a loop would help, and it would take ages).  */
 300
 301 #define MAX_CONSIDERED_USES \
 302   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 303
 304 /* If there are at most this number of ivs in the set, try removing unnecessary
 305    ivs from the set always.  */
 306
 307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 308   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 309
 310 /* The list of trees for that the decl_rtl field must be reset is stored
 311    here.  */
 312
 313 static VEC(tree,heap) *decl_rtl_to_reset;
 314
 315 /* Number of uses recorded in DATA.  */
 316
 317 static inline unsigned
 318 n_iv_uses (struct ivopts_data *data)
 319 {
 320   return VEC_length (iv_use_p, data->iv_uses);
 321 }
 322
 323 /* Ith use recorded in DATA.  */
 324
 325 static inline struct iv_use *
 326 iv_use (struct ivopts_data *data, unsigned i)
 327 {
 328   return VEC_index (iv_use_p, data->iv_uses, i);
 329 }
 330
 331 /* Number of candidates recorded in DATA.  */
 332
 333 static inline unsigned
 334 n_iv_cands (struct ivopts_data *data)
 335 {
 336   return VEC_length (iv_cand_p, data->iv_candidates);
 337 }
 338
 339 /* Ith candidate recorded in DATA.  */
 340
 341 static inline struct iv_cand *
 342 iv_cand (struct ivopts_data *data, unsigned i)
 343 {
 344   return VEC_index (iv_cand_p, data->iv_candidates, i);
 345 }
 346
 347 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 348
 349 edge
 350 single_dom_exit (struct loop *loop)
 351 {
 352   edge exit = loop->single_exit;
 353
 354   if (!exit)
 355     return NULL;
 356
 357   if (!just_once_each_iteration_p (loop, exit->src))
 358     return NULL;
 359
 360   return exit;
 361 }
 362
 363 /* Dumps information about the induction variable IV to FILE.  */
 364
 365 extern void dump_iv (FILE *, struct iv *);
 366 void
 367 dump_iv (FILE *file, struct iv *iv)
 368 {
 369   if (iv->ssa_name)
 370     {
 371       fprintf (file, "ssa name ");
 372       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 373       fprintf (file, "\n");
 374     }
 375
 376   fprintf (file, "  type ");
 377   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 378   fprintf (file, "\n");
 379
 380   if (iv->step)
 381     {
 382       fprintf (file, "  base ");
 383       print_generic_expr (file, iv->base, TDF_SLIM);
 384       fprintf (file, "\n");
 385
 386       fprintf (file, "  step ");
 387       print_generic_expr (file, iv->step, TDF_SLIM);
 388       fprintf (file, "\n");
 389     }
 390   else
 391     {
 392       fprintf (file, "  invariant ");
 393       print_generic_expr (file, iv->base, TDF_SLIM);
 394       fprintf (file, "\n");
 395     }
 396
 397   if (iv->base_object)
 398     {
 399       fprintf (file, "  base object ");
 400       print_generic_expr (file, iv->base_object, TDF_SLIM);
 401       fprintf (file, "\n");
 402     }
 403
 404   if (iv->biv_p)
 405     fprintf (file, "  is a biv\n");
 406 }
 407
 408 /* Dumps information about the USE to FILE.  */
 409
 410 extern void dump_use (FILE *, struct iv_use *);
 411 void
 412 dump_use (FILE *file, struct iv_use *use)
 413 {
 414   fprintf (file, "use %d\n", use->id);
 415
 416   switch (use->type)
 417     {
 418     case USE_NONLINEAR_EXPR:
 419       fprintf (file, "  generic\n");
 420       break;
 421
 422     case USE_ADDRESS:
 423       fprintf (file, "  address\n");
 424       break;
 425
 426     case USE_COMPARE:
 427       fprintf (file, "  compare\n");
 428       break;
 429
 430     default:
 431       gcc_unreachable ();
 432     }
 433
 434   fprintf (file, "  in statement ");
 435   print_generic_expr (file, use->stmt, TDF_SLIM);
 436   fprintf (file, "\n");
 437
 438   fprintf (file, "  at position ");
 439   if (use->op_p)
 440     print_generic_expr (file, *use->op_p, TDF_SLIM);
 441   fprintf (file, "\n");
 442
 443   dump_iv (file, use->iv);
 444
 445   if (use->related_cands)
 446     {
 447       fprintf (file, "  related candidates ");
 448       dump_bitmap (file, use->related_cands);
 449     }
 450 }
 451
 452 /* Dumps information about the uses to FILE.  */
 453
 454 extern void dump_uses (FILE *, struct ivopts_data *);
 455 void
 456 dump_uses (FILE *file, struct ivopts_data *data)
 457 {
 458   unsigned i;
 459   struct iv_use *use;
 460
 461   for (i = 0; i < n_iv_uses (data); i++)
 462     {
 463       use = iv_use (data, i);
 464
 465       dump_use (file, use);
 466       fprintf (file, "\n");
 467     }
 468 }
 469
 470 /* Dumps information about induction variable candidate CAND to FILE.  */
 471
 472 extern void dump_cand (FILE *, struct iv_cand *);
 473 void
 474 dump_cand (FILE *file, struct iv_cand *cand)
 475 {
 476   struct iv *iv = cand->iv;
 477
 478   fprintf (file, "candidate %d%s\n",
 479            cand->id, cand->important ? " (important)" : "");
 480
 481   if (cand->depends_on)
 482     {
 483       fprintf (file, "  depends on ");
 484       dump_bitmap (file, cand->depends_on);
 485     }
 486
 487   if (!iv)
 488     {
 489       fprintf (file, "  final value replacement\n");
 490       return;
 491     }
 492
 493   switch (cand->pos)
 494     {
 495     case IP_NORMAL:
 496       fprintf (file, "  incremented before exit test\n");
 497       break;
 498
 499     case IP_END:
 500       fprintf (file, "  incremented at end\n");
 501       break;
 502
 503     case IP_ORIGINAL:
 504       fprintf (file, "  original biv\n");
 505       break;
 506     }
 507
 508   dump_iv (file, iv);
 509 }
 510
 511 /* Returns the info for ssa version VER.  */
 512
 513 static inline struct version_info *
 514 ver_info (struct ivopts_data *data, unsigned ver)
 515 {
 516   return data->version_info + ver;
 517 }
 518
 519 /* Returns the info for ssa name NAME.  */
 520
 521 static inline struct version_info *
 522 name_info (struct ivopts_data *data, tree name)
 523 {
 524   return ver_info (data, SSA_NAME_VERSION (name));
 525 }
 526
 527 /* Checks whether there exists number X such that X * B = A, counting modulo
 528    2^BITS.  */
 529
 530 static bool
 531 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 532         HOST_WIDE_INT *x)
 533 {
 534   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 535   unsigned HOST_WIDE_INT inv, ex, val;
 536   unsigned i;
 537
 538   a &= mask;
 539   b &= mask;
 540
 541   /* First divide the whole equation by 2 as long as possible.  */
 542   while (!(a & 1) && !(b & 1))
 543     {
 544       a >>= 1;
 545       b >>= 1;
 546       bits--;
 547       mask >>= 1;
 548     }
 549
 550   if (!(b & 1))
 551     {
 552       /* If b is still even, a is odd and there is no such x.  */
 553       return false;
 554     }
 555
 556   /* Find the inverse of b.  We compute it as
 557      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 558   inv = 1;
 559   ex = b;
 560   for (i = 0; i < bits - 1; i++)
 561     {
 562       inv = (inv * ex) & mask;
 563       ex = (ex * ex) & mask;
 564     }
 565
 566   val = (a * inv) & mask;
 567
 568   gcc_assert (((val * b) & mask) == a);
 569
 570   if ((val >> (bits - 1)) & 1)
 571     val |= ~mask;
 572
 573   *x = val;
 574
 575   return true;
 576 }
 577
 578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 579    emitted in LOOP.  */
 580
 581 static bool
 582 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 583 {
 584   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 585
 586   gcc_assert (bb);
 587
 588   if (sbb == loop->latch)
 589     return true;
 590
 591   if (sbb != bb)
 592     return false;
 593
 594   return stmt == last_stmt (bb);
 595 }
 596
 597 /* Returns true if STMT if after the place where the original induction
 598    variable CAND is incremented.  */
 599
 600 static bool
 601 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 602 {
 603   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 604   basic_block stmt_bb = bb_for_stmt (stmt);
 605   block_stmt_iterator bsi;
 606
 607   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 608     return false;
 609
 610   if (stmt_bb != cand_bb)
 611     return true;
 612
 613   /* Scan the block from the end, since the original ivs are usually
 614      incremented at the end of the loop body.  */
 615   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 616     {
 617       if (bsi_stmt (bsi) == cand->incremented_at)
 618         return false;
 619       if (bsi_stmt (bsi) == stmt)
 620         return true;
 621     }
 622 }
 623
 624 /* Returns true if STMT if after the place where the induction variable
 625    CAND is incremented in LOOP.  */
 626
 627 static bool
 628 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 629 {
 630   switch (cand->pos)
 631     {
 632     case IP_END:
 633       return false;
 634
 635     case IP_NORMAL:
 636       return stmt_after_ip_normal_pos (loop, stmt);
 637
 638     case IP_ORIGINAL:
 639       return stmt_after_ip_original_pos (cand, stmt);
 640
 641     default:
 642       gcc_unreachable ();
 643     }
 644 }
 645
 646 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 647
 648 static bool
 649 abnormal_ssa_name_p (tree exp)
 650 {
 651   if (!exp)
 652     return false;
 653
 654   if (TREE_CODE (exp) != SSA_NAME)
 655     return false;
 656
 657   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 658 }
 659
 660 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 661    abnormal phi node.  Callback for for_each_index.  */
 662
 663 static bool
 664 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 665                                   void *data ATTRIBUTE_UNUSED)
 666 {
 667   if (TREE_CODE (base) == ARRAY_REF)
 668     {
 669       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 670         return false;
 671       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 672         return false;
 673     }
 674
 675   return !abnormal_ssa_name_p (*index);
 676 }
 677
 678 /* Returns true if EXPR contains a ssa name that occurs in an
 679    abnormal phi node.  */
 680
 681 bool
 682 contains_abnormal_ssa_name_p (tree expr)
 683 {
 684   enum tree_code code;
 685   enum tree_code_class class;
 686
 687   if (!expr)
 688     return false;
 689
 690   code = TREE_CODE (expr);
 691   class = TREE_CODE_CLASS (code);
 692
 693   if (code == SSA_NAME)
 694     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 695
 696   if (code == INTEGER_CST
 697       || is_gimple_min_invariant (expr))
 698     return false;
 699
 700   if (code == ADDR_EXPR)
 701     return !for_each_index (&TREE_OPERAND (expr, 0),
 702                             idx_contains_abnormal_ssa_name_p,
 703                             NULL);
 704
 705   switch (class)
 706     {
 707     case tcc_binary:
 708     case tcc_comparison:
 709       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 710         return true;
 711
 712       /* Fallthru.  */
 713     case tcc_unary:
 714       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 715         return true;
 716
 717       break;
 718
 719     default:
 720       gcc_unreachable ();
 721     }
 722
 723   return false;
 724 }
 725
 726 /* Element of the table in that we cache the numbers of iterations obtained
 727    from exits of the loop.  */
 728
 729 struct nfe_cache_elt
 730 {
 731   /* The edge for that the number of iterations is cached.  */
 732   edge exit;
 733
 734   /* Number of iterations corresponding to this exit, or NULL if it cannot be
 735      determined.  */
 736   tree niter;
 737 };
 738
 739 /* Hash function for nfe_cache_elt E.  */
 740
 741 static hashval_t
 742 nfe_hash (const void *e)
 743 {
 744   const struct nfe_cache_elt *elt = e;
 745
 746   return htab_hash_pointer (elt->exit);
 747 }
 748
 749 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 750
 751 static int
 752 nfe_eq (const void *e1, const void *e2)
 753 {
 754   const struct nfe_cache_elt *elt1 = e1;
 755
 756   return elt1->exit == e2;
 757 }
 758
 759 /*  Returns tree describing number of iterations determined from
 760     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 761
 762 static tree
 763 niter_for_exit (struct ivopts_data *data, edge exit)
 764 {
 765   struct nfe_cache_elt *nfe_desc;
 766   struct tree_niter_desc desc;
 767   PTR *slot;
 768
 769   slot = htab_find_slot_with_hash (data->niters, exit,
 770                                    htab_hash_pointer (exit),
 771                                    INSERT);
 772
 773   if (!*slot)
 774     {
 775       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 776       nfe_desc->exit = exit;
 777
 778       /* Try to determine number of iterations.  We must know it
 779          unconditionally (i.e., without possibility of # of iterations
 780          being zero).  Also, we cannot safely work with ssa names that
 781          appear in phi nodes on abnormal edges, so that we do not create
 782          overlapping life ranges for them (PR 27283).  */
 783       if (number_of_iterations_exit (data->current_loop,
 784                                      exit, &desc, true)
 785           && zero_p (desc.may_be_zero)
 786           && !contains_abnormal_ssa_name_p (desc.niter))
 787         nfe_desc->niter = desc.niter;
 788       else
 789         nfe_desc->niter = NULL_TREE;
 790     }
 791   else
 792     nfe_desc = *slot;
 793
 794   return nfe_desc->niter;
 795 }
 796
 797 /* Returns tree describing number of iterations determined from
 798    single dominating exit of DATA->current_loop, or NULL if something
 799    goes wrong.  */
 800
 801 static tree
 802 niter_for_single_dom_exit (struct ivopts_data *data)
 803 {
 804   edge exit = single_dom_exit (data->current_loop);
 805
 806   if (!exit)
 807     return NULL;
 808
 809   return niter_for_exit (data, exit);
 810 }
 811
 812 /* Initializes data structures used by the iv optimization pass, stored
 813    in DATA.  */
 814
 815 static void
 816 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 817 {
 818   data->version_info_size = 2 * num_ssa_names;
 819   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 820   data->relevant = BITMAP_ALLOC (NULL);
 821   data->important_candidates = BITMAP_ALLOC (NULL);
 822   data->max_inv_id = 0;
 823   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 824   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 825   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 826   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 827 }
 828
 829 /* Returns a memory object to that EXPR points.  In case we are able to
 830    determine that it does not point to any such object, NULL is returned.  */
 831
 832 static tree
 833 determine_base_object (tree expr)
 834 {
 835   enum tree_code code = TREE_CODE (expr);
 836   tree base, obj, op0, op1;
 837
 838   /* If this is a pointer casted to any type, we need to determine
 839      the base object for the pointer; so handle conversions before
 840      throwing away non-pointer expressions.  */
 841   if (TREE_CODE (expr) == NOP_EXPR
 842       || TREE_CODE (expr) == CONVERT_EXPR)
 843     return determine_base_object (TREE_OPERAND (expr, 0));
 844
 845   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 846     return NULL_TREE;
 847
 848   switch (code)
 849     {
 850     case INTEGER_CST:
 851       return NULL_TREE;
 852
 853     case ADDR_EXPR:
 854       obj = TREE_OPERAND (expr, 0);
 855       base = get_base_address (obj);
 856
 857       if (!base)
 858         return expr;
 859
 860       if (TREE_CODE (base) == INDIRECT_REF)
 861         return determine_base_object (TREE_OPERAND (base, 0));
 862
 863       return fold_convert (ptr_type_node,
 864                            build_fold_addr_expr (base));
 865
 866     case PLUS_EXPR:
 867     case MINUS_EXPR:
 868       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 869       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 870
 871       if (!op1)
 872         return op0;
 873
 874       if (!op0)
 875         return (code == PLUS_EXPR
 876                 ? op1
 877                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 878
 879       return fold_build2 (code, ptr_type_node, op0, op1);
 880
 881     default:
 882       return fold_convert (ptr_type_node, expr);
 883     }
 884 }
 885
 886 /* Allocates an induction variable with given initial value BASE and step STEP
 887    for loop LOOP.  */
 888
 889 static struct iv *
 890 alloc_iv (tree base, tree step)
 891 {
 892   struct iv *iv = XCNEW (struct iv);
 893
 894   if (step && integer_zerop (step))
 895     step = NULL_TREE;
 896
 897   iv->base = base;
 898   iv->base_object = determine_base_object (base);
 899   iv->step = step;
 900   iv->biv_p = false;
 901   iv->have_use_for = false;
 902   iv->use_id = 0;
 903   iv->ssa_name = NULL_TREE;
 904
 905   return iv;
 906 }
 907
 908 /* Sets STEP and BASE for induction variable IV.  */
 909
 910 static void
 911 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 912 {
 913   struct version_info *info = name_info (data, iv);
 914
 915   gcc_assert (!info->iv);
 916
 917   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 918   info->iv = alloc_iv (base, step);
 919   info->iv->ssa_name = iv;
 920 }
 921
 922 /* Finds induction variable declaration for VAR.  */
 923
 924 static struct iv *
 925 get_iv (struct ivopts_data *data, tree var)
 926 {
 927   basic_block bb;
 928
 929   if (!name_info (data, var)->iv)
 930     {
 931       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 932
 933       if (!bb
 934           || !flow_bb_inside_loop_p (data->current_loop, bb))
 935         set_iv (data, var, var, NULL_TREE);
 936     }
 937
 938   return name_info (data, var)->iv;
 939 }
 940
 941 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 942    not define a simple affine biv with nonzero step.  */
 943
 944 static tree
 945 determine_biv_step (tree phi)
 946 {
 947   struct loop *loop = bb_for_stmt (phi)->loop_father;
 948   tree name = PHI_RESULT (phi);
 949   affine_iv iv;
 950
 951   if (!is_gimple_reg (name))
 952     return NULL_TREE;
 953
 954   if (!simple_iv (loop, phi, name, &iv, true))
 955     return NULL_TREE;
 956
 957   return (zero_p (iv.step) ? NULL_TREE : iv.step);
 958 }
 959
 960 /* Finds basic ivs.  */
 961
 962 static bool
 963 find_bivs (struct ivopts_data *data)
 964 {
 965   tree phi, step, type, base;
 966   bool found = false;
 967   struct loop *loop = data->current_loop;
 968
 969   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 970     {
 971       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 972         continue;
 973
 974       step = determine_biv_step (phi);
 975       if (!step)
 976         continue;
 977
 978       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 979       base = expand_simple_operations (base);
 980       if (contains_abnormal_ssa_name_p (base)
 981           || contains_abnormal_ssa_name_p (step))
 982         continue;
 983
 984       type = TREE_TYPE (PHI_RESULT (phi));
 985       base = fold_convert (type, base);
 986       if (step)
 987         step = fold_convert (type, step);
 988
 989       set_iv (data, PHI_RESULT (phi), base, step);
 990       found = true;
 991     }
 992
 993   return found;
 994 }
 995
 996 /* Marks basic ivs.  */
 997
 998 static void
 999 mark_bivs (struct ivopts_data *data)
1000 {
1001   tree phi, var;
1002   struct iv *iv, *incr_iv;
1003   struct loop *loop = data->current_loop;
1004   basic_block incr_bb;
1005
1006   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1007     {
1008       iv = get_iv (data, PHI_RESULT (phi));
1009       if (!iv)
1010         continue;
1011
1012       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1013       incr_iv = get_iv (data, var);
1014       if (!incr_iv)
1015         continue;
1016
1017       /* If the increment is in the subloop, ignore it.  */
1018       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1019       if (incr_bb->loop_father != data->current_loop
1020           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1021         continue;
1022
1023       iv->biv_p = true;
1024       incr_iv->biv_p = true;
1025     }
1026 }
1027
1028 /* Checks whether STMT defines a linear induction variable and stores its
1029    parameters to IV.  */
1030
1031 static bool
1032 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1033 {
1034   tree lhs;
1035   struct loop *loop = data->current_loop;
1036
1037   iv->base = NULL_TREE;
1038   iv->step = NULL_TREE;
1039
1040   if (TREE_CODE (stmt) != MODIFY_EXPR)
1041     return false;
1042
1043   lhs = TREE_OPERAND (stmt, 0);
1044   if (TREE_CODE (lhs) != SSA_NAME)
1045     return false;
1046
1047   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1048     return false;
1049   iv->base = expand_simple_operations (iv->base);
1050
1051   if (contains_abnormal_ssa_name_p (iv->base)
1052       || contains_abnormal_ssa_name_p (iv->step))
1053     return false;
1054
1055   return true;
1056 }
1057
1058 /* Finds general ivs in statement STMT.  */
1059
1060 static void
1061 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1062 {
1063   affine_iv iv;
1064
1065   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1066     return;
1067
1068   set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1069 }
1070
1071 /* Finds general ivs in basic block BB.  */
1072
1073 static void
1074 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1075 {
1076   block_stmt_iterator bsi;
1077
1078   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1079     find_givs_in_stmt (data, bsi_stmt (bsi));
1080 }
1081
1082 /* Finds general ivs.  */
1083
1084 static void
1085 find_givs (struct ivopts_data *data)
1086 {
1087   struct loop *loop = data->current_loop;
1088   basic_block *body = get_loop_body_in_dom_order (loop);
1089   unsigned i;
1090
1091   for (i = 0; i < loop->num_nodes; i++)
1092     find_givs_in_bb (data, body[i]);
1093   free (body);
1094 }
1095
1096 /* For each ssa name defined in LOOP determines whether it is an induction
1097    variable and if so, its initial value and step.  */
1098
1099 static bool
1100 find_induction_variables (struct ivopts_data *data)
1101 {
1102   unsigned i;
1103   bitmap_iterator bi;
1104
1105   if (!find_bivs (data))
1106     return false;
1107
1108   find_givs (data);
1109   mark_bivs (data);
1110
1111   if (dump_file && (dump_flags & TDF_DETAILS))
1112     {
1113       tree niter = niter_for_single_dom_exit (data);
1114
1115       if (niter)
1116         {
1117           fprintf (dump_file, "  number of iterations ");
1118           print_generic_expr (dump_file, niter, TDF_SLIM);
1119           fprintf (dump_file, "\n\n");
1120         };
1121
1122       fprintf (dump_file, "Induction variables:\n\n");
1123
1124       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1125         {
1126           if (ver_info (data, i)->iv)
1127             dump_iv (dump_file, ver_info (data, i)->iv);
1128         }
1129     }
1130
1131   return true;
1132 }
1133
1134 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1135
1136 static struct iv_use *
1137 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1138             tree stmt, enum use_type use_type)
1139 {
1140   struct iv_use *use = XCNEW (struct iv_use);
1141
1142   use->id = n_iv_uses (data);
1143   use->type = use_type;
1144   use->iv = iv;
1145   use->stmt = stmt;
1146   use->op_p = use_p;
1147   use->related_cands = BITMAP_ALLOC (NULL);
1148
1149   /* To avoid showing ssa name in the dumps, if it was not reset by the
1150      caller.  */
1151   iv->ssa_name = NULL_TREE;
1152
1153   if (dump_file && (dump_flags & TDF_DETAILS))
1154     dump_use (dump_file, use);
1155
1156   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1157
1158   return use;
1159 }
1160
1161 /* Checks whether OP is a loop-level invariant and if so, records it.
1162    NONLINEAR_USE is true if the invariant is used in a way we do not
1163    handle specially.  */
1164
1165 static void
1166 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1167 {
1168   basic_block bb;
1169   struct version_info *info;
1170
1171   if (TREE_CODE (op) != SSA_NAME
1172       || !is_gimple_reg (op))
1173     return;
1174
1175   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1176   if (bb
1177       && flow_bb_inside_loop_p (data->current_loop, bb))
1178     return;
1179
1180   info = name_info (data, op);
1181   info->name = op;
1182   info->has_nonlin_use |= nonlinear_use;
1183   if (!info->inv_id)
1184     info->inv_id = ++data->max_inv_id;
1185   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1186 }
1187
1188 /* Checks whether the use OP is interesting and if so, records it.  */
1189
1190 static struct iv_use *
1191 find_interesting_uses_op (struct ivopts_data *data, tree op)
1192 {
1193   struct iv *iv;
1194   struct iv *civ;
1195   tree stmt;
1196   struct iv_use *use;
1197
1198   if (TREE_CODE (op) != SSA_NAME)
1199     return NULL;
1200
1201   iv = get_iv (data, op);
1202   if (!iv)
1203     return NULL;
1204
1205   if (iv->have_use_for)
1206     {
1207       use = iv_use (data, iv->use_id);
1208
1209       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1210       return use;
1211     }
1212
1213   if (zero_p (iv->step))
1214     {
1215       record_invariant (data, op, true);
1216       return NULL;
1217     }
1218   iv->have_use_for = true;
1219
1220   civ = XNEW (struct iv);
1221   *civ = *iv;
1222
1223   stmt = SSA_NAME_DEF_STMT (op);
1224   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1225               || TREE_CODE (stmt) == MODIFY_EXPR);
1226
1227   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1228   iv->use_id = use->id;
1229
1230   return use;
1231 }
1232
1233 /* Checks whether the condition *COND_P in STMT is interesting
1234    and if so, records it.  */
1235
1236 static void
1237 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1238 {
1239   tree *op0_p;
1240   tree *op1_p;
1241   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1242   struct iv const_iv;
1243   tree zero = integer_zero_node;
1244
1245   const_iv.step = NULL_TREE;
1246
1247   if (TREE_CODE (*cond_p) != SSA_NAME
1248       && !COMPARISON_CLASS_P (*cond_p))
1249     return;
1250
1251   if (TREE_CODE (*cond_p) == SSA_NAME)
1252     {
1253       op0_p = cond_p;
1254       op1_p = &zero;
1255     }
1256   else
1257     {
1258       op0_p = &TREE_OPERAND (*cond_p, 0);
1259       op1_p = &TREE_OPERAND (*cond_p, 1);
1260     }
1261
1262   if (TREE_CODE (*op0_p) == SSA_NAME)
1263     iv0 = get_iv (data, *op0_p);
1264   else
1265     iv0 = &const_iv;
1266
1267   if (TREE_CODE (*op1_p) == SSA_NAME)
1268     iv1 = get_iv (data, *op1_p);
1269   else
1270     iv1 = &const_iv;
1271
1272   if (/* When comparing with non-invariant value, we may not do any senseful
1273          induction variable elimination.  */
1274       (!iv0 || !iv1)
1275       /* Eliminating condition based on two ivs would be nontrivial.
1276          ??? TODO -- it is not really important to handle this case.  */
1277       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1278     {
1279       find_interesting_uses_op (data, *op0_p);
1280       find_interesting_uses_op (data, *op1_p);
1281       return;
1282     }
1283
1284   if (zero_p (iv0->step) && zero_p (iv1->step))
1285     {
1286       /* If both are invariants, this is a work for unswitching.  */
1287       return;
1288     }
1289
1290   civ = XNEW (struct iv);
1291   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1292   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1293 }
1294
1295 /* Returns true if expression EXPR is obviously invariant in LOOP,
1296    i.e. if all its operands are defined outside of the LOOP.  */
1297
1298 bool
1299 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1300 {
1301   basic_block def_bb;
1302   unsigned i, len;
1303
1304   if (is_gimple_min_invariant (expr))
1305     return true;
1306
1307   if (TREE_CODE (expr) == SSA_NAME)
1308     {
1309       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1310       if (def_bb
1311           && flow_bb_inside_loop_p (loop, def_bb))
1312         return false;
1313
1314       return true;
1315     }
1316
1317   if (!EXPR_P (expr))
1318     return false;
1319
1320   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1321   for (i = 0; i < len; i++)
1322     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1323       return false;
1324
1325   return true;
1326 }
1327
1328 /* Cumulates the steps of indices into DATA and replaces their values with the
1329    initial ones.  Returns false when the value of the index cannot be determined.
1330    Callback for for_each_index.  */
1331
1332 struct ifs_ivopts_data
1333 {
1334   struct ivopts_data *ivopts_data;
1335   tree stmt;
1336   tree *step_p;
1337 };
1338
1339 static bool
1340 idx_find_step (tree base, tree *idx, void *data)
1341 {
1342   struct ifs_ivopts_data *dta = data;
1343   struct iv *iv;
1344   tree step, iv_base, iv_step, lbound, off;
1345   struct loop *loop = dta->ivopts_data->current_loop;
1346
1347   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1348       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1349     return false;
1350
1351   /* If base is a component ref, require that the offset of the reference
1352      be invariant.  */
1353   if (TREE_CODE (base) == COMPONENT_REF)
1354     {
1355       off = component_ref_field_offset (base);
1356       return expr_invariant_in_loop_p (loop, off);
1357     }
1358
1359   /* If base is array, first check whether we will be able to move the
1360      reference out of the loop (in order to take its address in strength
1361      reduction).  In order for this to work we need both lower bound
1362      and step to be loop invariants.  */
1363   if (TREE_CODE (base) == ARRAY_REF)
1364     {
1365       step = array_ref_element_size (base);
1366       lbound = array_ref_low_bound (base);
1367
1368       if (!expr_invariant_in_loop_p (loop, step)
1369           || !expr_invariant_in_loop_p (loop, lbound))
1370         return false;
1371     }
1372
1373   if (TREE_CODE (*idx) != SSA_NAME)
1374     return true;
1375
1376   iv = get_iv (dta->ivopts_data, *idx);
1377   if (!iv)
1378     return false;
1379
1380   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1381           *&x[0], which is not folded and does not trigger the
1382           ARRAY_REF path below.  */
1383   *idx = iv->base;
1384
1385   if (!iv->step)
1386     return true;
1387
1388   if (TREE_CODE (base) == ARRAY_REF)
1389     {
1390       step = array_ref_element_size (base);
1391
1392       /* We only handle addresses whose step is an integer constant.  */
1393       if (TREE_CODE (step) != INTEGER_CST)
1394         return false;
1395     }
1396   else
1397     /* The step for pointer arithmetics already is 1 byte.  */
1398     step = build_int_cst (sizetype, 1);
1399
1400   iv_base = iv->base;
1401   iv_step = iv->step;
1402   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1403                             sizetype, &iv_base, &iv_step, dta->stmt,
1404                             false))
1405     {
1406       /* The index might wrap.  */
1407       return false;
1408     }
1409
1410   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1411
1412   if (!*dta->step_p)
1413     *dta->step_p = step;
1414   else
1415     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1416
1417   return true;
1418 }
1419
1420 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1421    object is passed to it in DATA.  */
1422
1423 static bool
1424 idx_record_use (tree base, tree *idx,
1425                 void *data)
1426 {
1427   find_interesting_uses_op (data, *idx);
1428   if (TREE_CODE (base) == ARRAY_REF)
1429     {
1430       find_interesting_uses_op (data, array_ref_element_size (base));
1431       find_interesting_uses_op (data, array_ref_low_bound (base));
1432     }
1433   return true;
1434 }
1435
1436 /* Returns true if memory reference REF may be unaligned.  */
1437
1438 static bool
1439 may_be_unaligned_p (tree ref)
1440 {
1441   tree base;
1442   tree base_type;
1443   HOST_WIDE_INT bitsize;
1444   HOST_WIDE_INT bitpos;
1445   tree toffset;
1446   enum machine_mode mode;
1447   int unsignedp, volatilep;
1448   unsigned base_align;
1449
1450   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1451      thus they are not misaligned.  */
1452   if (TREE_CODE (ref) == TARGET_MEM_REF)
1453     return false;
1454
1455   /* The test below is basically copy of what expr.c:normal_inner_ref
1456      does to check whether the object must be loaded by parts when
1457      STRICT_ALIGNMENT is true.  */
1458   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1459                               &unsignedp, &volatilep, true);
1460   base_type = TREE_TYPE (base);
1461   base_align = TYPE_ALIGN (base_type);
1462
1463   if (mode != BLKmode
1464       && (base_align < GET_MODE_ALIGNMENT (mode)
1465           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1466           || bitpos % BITS_PER_UNIT != 0))
1467     return true;
1468
1469   return false;
1470 }
1471
1472 /* Return true if EXPR may be non-addressable.   */
1473
1474 static bool
1475 may_be_nonaddressable_p (tree expr)
1476 {
1477   switch (TREE_CODE (expr))
1478     {
1479     case COMPONENT_REF:
1480       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1481              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1482
1483     case ARRAY_REF:
1484     case ARRAY_RANGE_REF:
1485       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1486
1487     case VIEW_CONVERT_EXPR:
1488       /* This kind of view-conversions may wrap non-addressable objects
1489          and make them look addressable.  After some processing the
1490          non-addressability may be uncovered again, causing ADDR_EXPRs
1491          of inappropriate objects to be built.  */
1492       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1493              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1494
1495     default:
1496       break;
1497     }
1498
1499   return false;
1500 }
1501
1502 /* Finds addresses in *OP_P inside STMT.  */
1503
1504 static void
1505 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1506 {
1507   tree base = *op_p, step = NULL;
1508   struct iv *civ;
1509   struct ifs_ivopts_data ifs_ivopts_data;
1510
1511   /* Do not play with volatile memory references.  A bit too conservative,
1512      perhaps, but safe.  */
1513   if (stmt_ann (stmt)->has_volatile_ops)
1514     goto fail;
1515
1516   /* Ignore bitfields for now.  Not really something terribly complicated
1517      to handle.  TODO.  */
1518   if (TREE_CODE (base) == BIT_FIELD_REF)
1519     goto fail;
1520
1521   if (may_be_nonaddressable_p (base))
1522     goto fail;
1523
1524   if (STRICT_ALIGNMENT
1525       && may_be_unaligned_p (base))
1526     goto fail;
1527
1528   base = unshare_expr (base);
1529
1530   if (TREE_CODE (base) == TARGET_MEM_REF)
1531     {
1532       tree type = build_pointer_type (TREE_TYPE (base));
1533       tree astep;
1534
1535       if (TMR_BASE (base)
1536           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1537         {
1538           civ = get_iv (data, TMR_BASE (base));
1539           if (!civ)
1540             goto fail;
1541
1542           TMR_BASE (base) = civ->base;
1543           step = civ->step;
1544         }
1545       if (TMR_INDEX (base)
1546           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1547         {
1548           civ = get_iv (data, TMR_INDEX (base));
1549           if (!civ)
1550             goto fail;
1551
1552           TMR_INDEX (base) = civ->base;
1553           astep = civ->step;
1554
1555           if (astep)
1556             {
1557               if (TMR_STEP (base))
1558                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1559
1560               if (step)
1561                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1562               else
1563                 step = astep;
1564             }
1565         }
1566
1567       if (zero_p (step))
1568         goto fail;
1569       base = tree_mem_ref_addr (type, base);
1570     }
1571   else
1572     {
1573       ifs_ivopts_data.ivopts_data = data;
1574       ifs_ivopts_data.stmt = stmt;
1575       ifs_ivopts_data.step_p = &step;
1576       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1577           || zero_p (step))
1578         goto fail;
1579
1580       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1581       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1582
1583       base = build_fold_addr_expr (base);
1584
1585       /* Substituting bases of IVs into the base expression might
1586          have caused folding opportunities.  */
1587       if (TREE_CODE (base) == ADDR_EXPR)
1588         {
1589           tree *ref = &TREE_OPERAND (base, 0);
1590           while (handled_component_p (*ref))
1591             ref = &TREE_OPERAND (*ref, 0);
1592           if (TREE_CODE (*ref) == INDIRECT_REF)
1593             *ref = fold_indirect_ref (*ref);
1594         }
1595     }
1596
1597   civ = alloc_iv (base, step);
1598   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1599   return;
1600
1601 fail:
1602   for_each_index (op_p, idx_record_use, data);
1603 }
1604
1605 /* Finds and records invariants used in STMT.  */
1606
1607 static void
1608 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1609 {
1610   ssa_op_iter iter;
1611   use_operand_p use_p;
1612   tree op;
1613
1614   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1615     {
1616       op = USE_FROM_PTR (use_p);
1617       record_invariant (data, op, false);
1618     }
1619 }
1620
1621 /* Finds interesting uses of induction variables in the statement STMT.  */
1622
1623 static void
1624 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1625 {
1626   struct iv *iv;
1627   tree op, lhs, rhs;
1628   ssa_op_iter iter;
1629   use_operand_p use_p;
1630
1631   find_invariants_stmt (data, stmt);
1632
1633   if (TREE_CODE (stmt) == COND_EXPR)
1634     {
1635       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1636       return;
1637     }
1638
1639   if (TREE_CODE (stmt) == MODIFY_EXPR)
1640     {
1641       lhs = TREE_OPERAND (stmt, 0);
1642       rhs = TREE_OPERAND (stmt, 1);
1643
1644       if (TREE_CODE (lhs) == SSA_NAME)
1645         {
1646           /* If the statement defines an induction variable, the uses are not
1647              interesting by themselves.  */
1648
1649           iv = get_iv (data, lhs);
1650
1651           if (iv && !zero_p (iv->step))
1652             return;
1653         }
1654
1655       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1656         {
1657         case tcc_comparison:
1658           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1659           return;
1660
1661         case tcc_reference:
1662           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1663           if (REFERENCE_CLASS_P (lhs))
1664             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1665           return;
1666
1667         default: ;
1668         }
1669
1670       if (REFERENCE_CLASS_P (lhs)
1671           && is_gimple_val (rhs))
1672         {
1673           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1674           find_interesting_uses_op (data, rhs);
1675           return;
1676         }
1677
1678       /* TODO -- we should also handle address uses of type
1679
1680          memory = call (whatever);
1681
1682          and
1683
1684          call (memory).  */
1685     }
1686
1687   if (TREE_CODE (stmt) == PHI_NODE
1688       && bb_for_stmt (stmt) == data->current_loop->header)
1689     {
1690       lhs = PHI_RESULT (stmt);
1691       iv = get_iv (data, lhs);
1692
1693       if (iv && !zero_p (iv->step))
1694         return;
1695     }
1696
1697   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1698     {
1699       op = USE_FROM_PTR (use_p);
1700
1701       if (TREE_CODE (op) != SSA_NAME)
1702         continue;
1703
1704       iv = get_iv (data, op);
1705       if (!iv)
1706         continue;
1707
1708       find_interesting_uses_op (data, op);
1709     }
1710 }
1711
1712 /* Finds interesting uses of induction variables outside of loops
1713    on loop exit edge EXIT.  */
1714
1715 static void
1716 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1717 {
1718   tree phi, def;
1719
1720   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1721     {
1722       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1723       find_interesting_uses_op (data, def);
1724     }
1725 }
1726
1727 /* Finds uses of the induction variables that are interesting.  */
1728
1729 static void
1730 find_interesting_uses (struct ivopts_data *data)
1731 {
1732   basic_block bb;
1733   block_stmt_iterator bsi;
1734   tree phi;
1735   basic_block *body = get_loop_body (data->current_loop);
1736   unsigned i;
1737   struct version_info *info;
1738   edge e;
1739
1740   if (dump_file && (dump_flags & TDF_DETAILS))
1741     fprintf (dump_file, "Uses:\n\n");
1742
1743   for (i = 0; i < data->current_loop->num_nodes; i++)
1744     {
1745       edge_iterator ei;
1746       bb = body[i];
1747
1748       FOR_EACH_EDGE (e, ei, bb->succs)
1749         if (e->dest != EXIT_BLOCK_PTR
1750             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1751           find_interesting_uses_outside (data, e);
1752
1753       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1754         find_interesting_uses_stmt (data, phi);
1755       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1756         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1757     }
1758
1759   if (dump_file && (dump_flags & TDF_DETAILS))
1760     {
1761       bitmap_iterator bi;
1762
1763       fprintf (dump_file, "\n");
1764
1765       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1766         {
1767           info = ver_info (data, i);
1768           if (info->inv_id)
1769             {
1770               fprintf (dump_file, "  ");
1771               print_generic_expr (dump_file, info->name, TDF_SLIM);
1772               fprintf (dump_file, " is invariant (%d)%s\n",
1773                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1774             }
1775         }
1776
1777       fprintf (dump_file, "\n");
1778     }
1779
1780   free (body);
1781 }
1782
1783 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1784    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1785    we are at the top-level of the processed address.  */
1786
1787 static tree
1788 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1789                 unsigned HOST_WIDE_INT *offset)
1790 {
1791   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1792   enum tree_code code;
1793   tree type, orig_type = TREE_TYPE (expr);
1794   unsigned HOST_WIDE_INT off0, off1, st;
1795   tree orig_expr = expr;
1796
1797   STRIP_NOPS (expr);
1798
1799   type = TREE_TYPE (expr);
1800   code = TREE_CODE (expr);
1801   *offset = 0;
1802
1803   switch (code)
1804     {
1805     case INTEGER_CST:
1806       if (!cst_and_fits_in_hwi (expr)
1807           || zero_p (expr))
1808         return orig_expr;
1809
1810       *offset = int_cst_value (expr);
1811       return build_int_cst (orig_type, 0);
1812
1813     case PLUS_EXPR:
1814     case MINUS_EXPR:
1815       op0 = TREE_OPERAND (expr, 0);
1816       op1 = TREE_OPERAND (expr, 1);
1817
1818       op0 = strip_offset_1 (op0, false, false, &off0);
1819       op1 = strip_offset_1 (op1, false, false, &off1);
1820
1821       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1822       if (op0 == TREE_OPERAND (expr, 0)
1823           && op1 == TREE_OPERAND (expr, 1))
1824         return orig_expr;
1825
1826       if (zero_p (op1))
1827         expr = op0;
1828       else if (zero_p (op0))
1829         {
1830           if (code == PLUS_EXPR)
1831             expr = op1;
1832           else
1833             expr = fold_build1 (NEGATE_EXPR, type, op1);
1834         }
1835       else
1836         expr = fold_build2 (code, type, op0, op1);
1837
1838       return fold_convert (orig_type, expr);
1839
1840     case ARRAY_REF:
1841       if (!inside_addr)
1842         return orig_expr;
1843
1844       step = array_ref_element_size (expr);
1845       if (!cst_and_fits_in_hwi (step))
1846         break;
1847
1848       st = int_cst_value (step);
1849       op1 = TREE_OPERAND (expr, 1);
1850       op1 = strip_offset_1 (op1, false, false, &off1);
1851       *offset = off1 * st;
1852
1853       if (top_compref
1854           && zero_p (op1))
1855         {
1856           /* Strip the component reference completely.  */
1857           op0 = TREE_OPERAND (expr, 0);
1858           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1859           *offset += off0;
1860           return op0;
1861         }
1862       break;
1863
1864     case COMPONENT_REF:
1865       if (!inside_addr)
1866         return orig_expr;
1867
1868       tmp = component_ref_field_offset (expr);
1869       if (top_compref
1870           && cst_and_fits_in_hwi (tmp))
1871         {
1872           /* Strip the component reference completely.  */
1873           op0 = TREE_OPERAND (expr, 0);
1874           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1875           *offset = off0 + int_cst_value (tmp);
1876           return op0;
1877         }
1878       break;
1879
1880     case ADDR_EXPR:
1881       op0 = TREE_OPERAND (expr, 0);
1882       op0 = strip_offset_1 (op0, true, true, &off0);
1883       *offset += off0;
1884
1885       if (op0 == TREE_OPERAND (expr, 0))
1886         return orig_expr;
1887
1888       expr = build_fold_addr_expr (op0);
1889       return fold_convert (orig_type, expr);
1890
1891     case INDIRECT_REF:
1892       inside_addr = false;
1893       break;
1894
1895     default:
1896       return orig_expr;
1897     }
1898
1899   /* Default handling of expressions for that we want to recurse into
1900      the first operand.  */
1901   op0 = TREE_OPERAND (expr, 0);
1902   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1903   *offset += off0;
1904
1905   if (op0 == TREE_OPERAND (expr, 0)
1906       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1907     return orig_expr;
1908
1909   expr = copy_node (expr);
1910   TREE_OPERAND (expr, 0) = op0;
1911   if (op1)
1912     TREE_OPERAND (expr, 1) = op1;
1913
1914   /* Inside address, we might strip the top level component references,
1915      thus changing type of the expression.  Handling of ADDR_EXPR
1916      will fix that.  */
1917   expr = fold_convert (orig_type, expr);
1918
1919   return expr;
1920 }
1921
1922 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1923
1924 static tree
1925 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1926 {
1927   return strip_offset_1 (expr, false, false, offset);
1928 }
1929
1930 /* Returns variant of TYPE that can be used as base for different uses.
1931    We return unsigned type with the same precision, which avoids problems
1932    with overflows.  */
1933
1934 static tree
1935 generic_type_for (tree type)
1936 {
1937   if (POINTER_TYPE_P (type))
1938     return unsigned_type_for (type);
1939
1940   if (TYPE_UNSIGNED (type))
1941     return type;
1942
1943   return unsigned_type_for (type);
1944 }
1945
1946 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1947    the bitmap to that we should store it.  */
1948
1949 static struct ivopts_data *fd_ivopts_data;
1950 static tree
1951 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1952 {
1953   bitmap *depends_on = data;
1954   struct version_info *info;
1955
1956   if (TREE_CODE (*expr_p) != SSA_NAME)
1957     return NULL_TREE;
1958   info = name_info (fd_ivopts_data, *expr_p);
1959
1960   if (!info->inv_id || info->has_nonlin_use)
1961     return NULL_TREE;
1962
1963   if (!*depends_on)
1964     *depends_on = BITMAP_ALLOC (NULL);
1965   bitmap_set_bit (*depends_on, info->inv_id);
1966
1967   return NULL_TREE;
1968 }
1969
1970 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1971    position to POS.  If USE is not NULL, the candidate is set as related to
1972    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1973    replacement of the final value of the iv by a direct computation.  */
1974
1975 static struct iv_cand *
1976 add_candidate_1 (struct ivopts_data *data,
1977                  tree base, tree step, bool important, enum iv_position pos,
1978                  struct iv_use *use, tree incremented_at)
1979 {
1980   unsigned i;
1981   struct iv_cand *cand = NULL;
1982   tree type, orig_type;
1983
1984   if (base)
1985     {
1986       orig_type = TREE_TYPE (base);
1987       type = generic_type_for (orig_type);
1988       if (type != orig_type)
1989         {
1990           base = fold_convert (type, base);
1991           if (step)
1992             step = fold_convert (type, step);
1993         }
1994     }
1995
1996   for (i = 0; i < n_iv_cands (data); i++)
1997     {
1998       cand = iv_cand (data, i);
1999
2000       if (cand->pos != pos)
2001         continue;
2002
2003       if (cand->incremented_at != incremented_at)
2004         continue;
2005
2006       if (!cand->iv)
2007         {
2008           if (!base && !step)
2009             break;
2010
2011           continue;
2012         }
2013
2014       if (!base && !step)
2015         continue;
2016
2017       if (!operand_equal_p (base, cand->iv->base, 0))
2018         continue;
2019
2020       if (zero_p (cand->iv->step))
2021         {
2022           if (zero_p (step))
2023             break;
2024         }
2025       else
2026         {
2027           if (step && operand_equal_p (step, cand->iv->step, 0))
2028             break;
2029         }
2030     }
2031
2032   if (i == n_iv_cands (data))
2033     {
2034       cand = XCNEW (struct iv_cand);
2035       cand->id = i;
2036
2037       if (!base && !step)
2038         cand->iv = NULL;
2039       else
2040         cand->iv = alloc_iv (base, step);
2041
2042       cand->pos = pos;
2043       if (pos != IP_ORIGINAL && cand->iv)
2044         {
2045           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2046           cand->var_after = cand->var_before;
2047         }
2048       cand->important = important;
2049       cand->incremented_at = incremented_at;
2050       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2051
2052       if (step
2053           && TREE_CODE (step) != INTEGER_CST)
2054         {
2055           fd_ivopts_data = data;
2056           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2057         }
2058
2059       if (dump_file && (dump_flags & TDF_DETAILS))
2060         dump_cand (dump_file, cand);
2061     }
2062
2063   if (important && !cand->important)
2064     {
2065       cand->important = true;
2066       if (dump_file && (dump_flags & TDF_DETAILS))
2067         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2068     }
2069
2070   if (use)
2071     {
2072       bitmap_set_bit (use->related_cands, i);
2073       if (dump_file && (dump_flags & TDF_DETAILS))
2074         fprintf (dump_file, "Candidate %d is related to use %d\n",
2075                  cand->id, use->id);
2076     }
2077
2078   return cand;
2079 }
2080
2081 /* Returns true if incrementing the induction variable at the end of the LOOP
2082    is allowed.
2083
2084    The purpose is to avoid splitting latch edge with a biv increment, thus
2085    creating a jump, possibly confusing other optimization passes and leaving
2086    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2087    is not available (so we do not have a better alternative), or if the latch
2088    edge is already nonempty.  */
2089
2090 static bool
2091 allow_ip_end_pos_p (struct loop *loop)
2092 {
2093   if (!ip_normal_pos (loop))
2094     return true;
2095
2096   if (!empty_block_p (ip_end_pos (loop)))
2097     return true;
2098
2099   return false;
2100 }
2101
2102 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2103    position to POS.  If USE is not NULL, the candidate is set as related to
2104    it.  The candidate computation is scheduled on all available positions.  */
2105
2106 static void
2107 add_candidate (struct ivopts_data *data,
2108                tree base, tree step, bool important, struct iv_use *use)
2109 {
2110   if (ip_normal_pos (data->current_loop))
2111     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2112   if (ip_end_pos (data->current_loop)
2113       && allow_ip_end_pos_p (data->current_loop))
2114     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2115 }
2116
2117 /* Add a standard "0 + 1 * iteration" iv candidate for a
2118    type with SIZE bits.  */
2119
2120 static void
2121 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2122                                      unsigned int size)
2123 {
2124   tree type = lang_hooks.types.type_for_size (size, true);
2125   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2126                  true, NULL);
2127 }
2128
2129 /* Adds standard iv candidates.  */
2130
2131 static void
2132 add_standard_iv_candidates (struct ivopts_data *data)
2133 {
2134   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2135
2136   /* The same for a double-integer type if it is still fast enough.  */
2137   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2138     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2139 }
2140
2141
2142 /* Adds candidates bases on the old induction variable IV.  */
2143
2144 static void
2145 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2146 {
2147   tree phi, def;
2148   struct iv_cand *cand;
2149
2150   add_candidate (data, iv->base, iv->step, true, NULL);
2151
2152   /* The same, but with initial value zero.  */
2153   add_candidate (data,
2154                  build_int_cst (TREE_TYPE (iv->base), 0),
2155                  iv->step, true, NULL);
2156
2157   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2158   if (TREE_CODE (phi) == PHI_NODE)
2159     {
2160       /* Additionally record the possibility of leaving the original iv
2161          untouched.  */
2162       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2163       cand = add_candidate_1 (data,
2164                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2165                               SSA_NAME_DEF_STMT (def));
2166       cand->var_before = iv->ssa_name;
2167       cand->var_after = def;
2168     }
2169 }
2170
2171 /* Adds candidates based on the old induction variables.  */
2172
2173 static void
2174 add_old_ivs_candidates (struct ivopts_data *data)
2175 {
2176   unsigned i;
2177   struct iv *iv;
2178   bitmap_iterator bi;
2179
2180   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2181     {
2182       iv = ver_info (data, i)->iv;
2183       if (iv && iv->biv_p && !zero_p (iv->step))
2184         add_old_iv_candidates (data, iv);
2185     }
2186 }
2187
2188 /* Adds candidates based on the value of the induction variable IV and USE.  */
2189
2190 static void
2191 add_iv_value_candidates (struct ivopts_data *data,
2192                          struct iv *iv, struct iv_use *use)
2193 {
2194   unsigned HOST_WIDE_INT offset;
2195   tree base;
2196
2197   add_candidate (data, iv->base, iv->step, false, use);
2198
2199   /* The same, but with initial value zero.  Make such variable important,
2200      since it is generic enough so that possibly many uses may be based
2201      on it.  */
2202   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2203                  iv->step, true, use);
2204
2205   /* Third, try removing the constant offset.  */
2206   base = strip_offset (iv->base, &offset);
2207   if (offset)
2208     add_candidate (data, base, iv->step, false, use);
2209 }
2210
2211 /* Adds candidates based on the uses.  */
2212
2213 static void
2214 add_derived_ivs_candidates (struct ivopts_data *data)
2215 {
2216   unsigned i;
2217
2218   for (i = 0; i < n_iv_uses (data); i++)
2219     {
2220       struct iv_use *use = iv_use (data, i);
2221
2222       if (!use)
2223         continue;
2224
2225       switch (use->type)
2226         {
2227         case USE_NONLINEAR_EXPR:
2228         case USE_COMPARE:
2229         case USE_ADDRESS:
2230           /* Just add the ivs based on the value of the iv used here.  */
2231           add_iv_value_candidates (data, use->iv, use);
2232           break;
2233
2234         default:
2235           gcc_unreachable ();
2236         }
2237     }
2238 }
2239
2240 /* Record important candidates and add them to related_cands bitmaps
2241    if needed.  */
2242
2243 static void
2244 record_important_candidates (struct ivopts_data *data)
2245 {
2246   unsigned i;
2247   struct iv_use *use;
2248
2249   for (i = 0; i < n_iv_cands (data); i++)
2250     {
2251       struct iv_cand *cand = iv_cand (data, i);
2252
2253       if (cand->important)
2254         bitmap_set_bit (data->important_candidates, i);
2255     }
2256
2257   data->consider_all_candidates = (n_iv_cands (data)
2258                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2259
2260   if (data->consider_all_candidates)
2261     {
2262       /* We will not need "related_cands" bitmaps in this case,
2263          so release them to decrease peak memory consumption.  */
2264       for (i = 0; i < n_iv_uses (data); i++)
2265         {
2266           use = iv_use (data, i);
2267           BITMAP_FREE (use->related_cands);
2268         }
2269     }
2270   else
2271     {
2272       /* Add important candidates to the related_cands bitmaps.  */
2273       for (i = 0; i < n_iv_uses (data); i++)
2274         bitmap_ior_into (iv_use (data, i)->related_cands,
2275                          data->important_candidates);
2276     }
2277 }
2278
2279 /* Finds the candidates for the induction variables.  */
2280
2281 static void
2282 find_iv_candidates (struct ivopts_data *data)
2283 {
2284   /* Add commonly used ivs.  */
2285   add_standard_iv_candidates (data);
2286
2287   /* Add old induction variables.  */
2288   add_old_ivs_candidates (data);
2289
2290   /* Add induction variables derived from uses.  */
2291   add_derived_ivs_candidates (data);
2292
2293   /* Record the important candidates.  */
2294   record_important_candidates (data);
2295 }
2296
2297 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2298    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2299    we allocate a simple list to every use.  */
2300
2301 static void
2302 alloc_use_cost_map (struct ivopts_data *data)
2303 {
2304   unsigned i, size, s, j;
2305
2306   for (i = 0; i < n_iv_uses (data); i++)
2307     {
2308       struct iv_use *use = iv_use (data, i);
2309       bitmap_iterator bi;
2310
2311       if (data->consider_all_candidates)
2312         size = n_iv_cands (data);
2313       else
2314         {
2315           s = 0;
2316           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2317             {
2318               s++;
2319             }
2320
2321           /* Round up to the power of two, so that moduling by it is fast.  */
2322           for (size = 1; size < s; size <<= 1)
2323             continue;
2324         }
2325
2326       use->n_map_members = size;
2327       use->cost_map = XCNEWVEC (struct cost_pair, size);
2328     }
2329 }
2330
2331 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2332    on invariants DEPENDS_ON and that the value used in expressing it
2333    is VALUE.*/
2334
2335 static void
2336 set_use_iv_cost (struct ivopts_data *data,
2337                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2338                  bitmap depends_on, tree value)
2339 {
2340   unsigned i, s;
2341
2342   if (cost == INFTY)
2343     {
2344       BITMAP_FREE (depends_on);
2345       return;
2346     }
2347
2348   if (data->consider_all_candidates)
2349     {
2350       use->cost_map[cand->id].cand = cand;
2351       use->cost_map[cand->id].cost = cost;
2352       use->cost_map[cand->id].depends_on = depends_on;
2353       use->cost_map[cand->id].value = value;
2354       return;
2355     }
2356
2357   /* n_map_members is a power of two, so this computes modulo.  */
2358   s = cand->id & (use->n_map_members - 1);
2359   for (i = s; i < use->n_map_members; i++)
2360     if (!use->cost_map[i].cand)
2361       goto found;
2362   for (i = 0; i < s; i++)
2363     if (!use->cost_map[i].cand)
2364       goto found;
2365
2366   gcc_unreachable ();
2367
2368 found:
2369   use->cost_map[i].cand = cand;
2370   use->cost_map[i].cost = cost;
2371   use->cost_map[i].depends_on = depends_on;
2372   use->cost_map[i].value = value;
2373 }
2374
2375 /* Gets cost of (USE, CANDIDATE) pair.  */
2376
2377 static struct cost_pair *
2378 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2379                  struct iv_cand *cand)
2380 {
2381   unsigned i, s;
2382   struct cost_pair *ret;
2383
2384   if (!cand)
2385     return NULL;
2386
2387   if (data->consider_all_candidates)
2388     {
2389       ret = use->cost_map + cand->id;
2390       if (!ret->cand)
2391         return NULL;
2392
2393       return ret;
2394     }
2395
2396   /* n_map_members is a power of two, so this computes modulo.  */
2397   s = cand->id & (use->n_map_members - 1);
2398   for (i = s; i < use->n_map_members; i++)
2399     if (use->cost_map[i].cand == cand)
2400       return use->cost_map + i;
2401
2402   for (i = 0; i < s; i++)
2403     if (use->cost_map[i].cand == cand)
2404       return use->cost_map + i;
2405
2406   return NULL;
2407 }
2408
2409 /* Returns estimate on cost of computing SEQ.  */
2410
2411 static unsigned
2412 seq_cost (rtx seq)
2413 {
2414   unsigned cost = 0;
2415   rtx set;
2416
2417   for (; seq; seq = NEXT_INSN (seq))
2418     {
2419       set = single_set (seq);
2420       if (set)
2421         cost += rtx_cost (set, SET);
2422       else
2423         cost++;
2424     }
2425
2426   return cost;
2427 }
2428
2429 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2430 static rtx
2431 produce_memory_decl_rtl (tree obj, int *regno)
2432 {
2433   rtx x;
2434
2435   gcc_assert (obj);
2436   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2437     {
2438       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2439       x = gen_rtx_SYMBOL_REF (Pmode, name);
2440     }
2441   else
2442     x = gen_raw_REG (Pmode, (*regno)++);
2443
2444   return gen_rtx_MEM (DECL_MODE (obj), x);
2445 }
2446
2447 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2448    walk_tree.  DATA contains the actual fake register number.  */
2449
2450 static tree
2451 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2452 {
2453   tree obj = NULL_TREE;
2454   rtx x = NULL_RTX;
2455   int *regno = data;
2456
2457   switch (TREE_CODE (*expr_p))
2458     {
2459     case ADDR_EXPR:
2460       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2461            handled_component_p (*expr_p);
2462            expr_p = &TREE_OPERAND (*expr_p, 0))
2463         continue;
2464       obj = *expr_p;
2465       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2466         x = produce_memory_decl_rtl (obj, regno);
2467       break;
2468
2469     case SSA_NAME:
2470       *ws = 0;
2471       obj = SSA_NAME_VAR (*expr_p);
2472       if (!DECL_RTL_SET_P (obj))
2473         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2474       break;
2475
2476     case VAR_DECL:
2477     case PARM_DECL:
2478     case RESULT_DECL:
2479       *ws = 0;
2480       obj = *expr_p;
2481
2482       if (DECL_RTL_SET_P (obj))
2483         break;
2484
2485       if (DECL_MODE (obj) == BLKmode)
2486         x = produce_memory_decl_rtl (obj, regno);
2487       else
2488         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2489
2490       break;
2491
2492     default:
2493       break;
2494     }
2495
2496   if (x)
2497     {
2498       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2499       SET_DECL_RTL (obj, x);
2500     }
2501
2502   return NULL_TREE;
2503 }
2504
2505 /* Determines cost of the computation of EXPR.  */
2506
2507 static unsigned
2508 computation_cost (tree expr)
2509 {
2510   rtx seq, rslt;
2511   tree type = TREE_TYPE (expr);
2512   unsigned cost;
2513   /* Avoid using hard regs in ways which may be unsupported.  */
2514   int regno = LAST_VIRTUAL_REGISTER + 1;
2515
2516   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2517   start_sequence ();
2518   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2519   seq = get_insns ();
2520   end_sequence ();
2521
2522   cost = seq_cost (seq);
2523   if (MEM_P (rslt))
2524     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2525
2526   return cost;
2527 }
2528
2529 /* Returns variable containing the value of candidate CAND at statement AT.  */
2530
2531 static tree
2532 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2533 {
2534   if (stmt_after_increment (loop, cand, stmt))
2535     return cand->var_after;
2536   else
2537     return cand->var_before;
2538 }
2539
2540 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2541    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2542
2543 int
2544 tree_int_cst_sign_bit (tree t)
2545 {
2546   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2547   unsigned HOST_WIDE_INT w;
2548
2549   if (bitno < HOST_BITS_PER_WIDE_INT)
2550     w = TREE_INT_CST_LOW (t);
2551   else
2552     {
2553       w = TREE_INT_CST_HIGH (t);
2554       bitno -= HOST_BITS_PER_WIDE_INT;
2555     }
2556
2557   return (w >> bitno) & 1;
2558 }
2559
2560 /* If we can prove that TOP = cst * BOT for some constant cst,
2561    store cst to MUL and return true.  Otherwise return false.
2562    The returned value is always sign-extended, regardless of the
2563    signedness of TOP and BOT.  */
2564
2565 static bool
2566 constant_multiple_of (tree top, tree bot, double_int *mul)
2567 {
2568   tree mby;
2569   enum tree_code code;
2570   double_int res, p0, p1;
2571   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2572
2573   STRIP_NOPS (top);
2574   STRIP_NOPS (bot);
2575
2576   if (operand_equal_p (top, bot, 0))
2577     {
2578       *mul = double_int_one;
2579       return true;
2580     }
2581
2582   code = TREE_CODE (top);
2583   switch (code)
2584     {
2585     case MULT_EXPR:
2586       mby = TREE_OPERAND (top, 1);
2587       if (TREE_CODE (mby) != INTEGER_CST)
2588         return false;
2589
2590       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2591         return false;
2592
2593       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2594                               precision);
2595       return true;
2596
2597     case PLUS_EXPR:
2598     case MINUS_EXPR:
2599       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2600           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2601         return false;
2602
2603       if (code == MINUS_EXPR)
2604         p1 = double_int_neg (p1);
2605       *mul = double_int_sext (double_int_add (p0, p1), precision);
2606       return true;
2607
2608     case INTEGER_CST:
2609       if (TREE_CODE (bot) != INTEGER_CST)
2610         return false;
2611
2612       p0 = double_int_sext (tree_to_double_int (bot), precision);
2613       p1 = double_int_sext (tree_to_double_int (top), precision);
2614       if (double_int_zero_p (p1))
2615         return false;
2616       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2617                               precision);
2618       return double_int_zero_p (res);
2619
2620     default:
2621       return false;
2622     }
2623 }
2624
2625 /* Sets COMB to CST.  */
2626
2627 static void
2628 aff_combination_const (struct affine_tree_combination *comb, tree type,
2629                        unsigned HOST_WIDE_INT cst)
2630 {
2631   unsigned prec = TYPE_PRECISION (type);
2632
2633   comb->type = type;
2634   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2635
2636   comb->n = 0;
2637   comb->rest = NULL_TREE;
2638   comb->offset = cst & comb->mask;
2639 }
2640
2641 /* Sets COMB to single element ELT.  */
2642
2643 static void
2644 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2645 {
2646   unsigned prec = TYPE_PRECISION (type);
2647
2648   comb->type = type;
2649   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2650
2651   comb->n = 1;
2652   comb->elts[0] = elt;
2653   comb->coefs[0] = 1;
2654   comb->rest = NULL_TREE;
2655   comb->offset = 0;
2656 }
2657
2658 /* Scales COMB by SCALE.  */
2659
2660 static void
2661 aff_combination_scale (struct affine_tree_combination *comb,
2662                        unsigned HOST_WIDE_INT scale)
2663 {
2664   unsigned i, j;
2665
2666   if (scale == 1)
2667     return;
2668
2669   if (scale == 0)
2670     {
2671       aff_combination_const (comb, comb->type, 0);
2672       return;
2673     }
2674
2675   comb->offset = (scale * comb->offset) & comb->mask;
2676   for (i = 0, j = 0; i < comb->n; i++)
2677     {
2678       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2679       comb->elts[j] = comb->elts[i];
2680       if (comb->coefs[j] != 0)
2681         j++;
2682     }
2683   comb->n = j;
2684
2685   if (comb->rest)
2686     {
2687       if (comb->n < MAX_AFF_ELTS)
2688         {
2689           comb->coefs[comb->n] = scale;
2690           comb->elts[comb->n] = comb->rest;
2691           comb->rest = NULL_TREE;
2692           comb->n++;
2693         }
2694       else
2695         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2696                                   build_int_cst_type (comb->type, scale));
2697     }
2698 }
2699
2700 /* Adds ELT * SCALE to COMB.  */
2701
2702 static void
2703 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2704                          unsigned HOST_WIDE_INT scale)
2705 {
2706   unsigned i;
2707
2708   if (scale == 0)
2709     return;
2710
2711   for (i = 0; i < comb->n; i++)
2712     if (operand_equal_p (comb->elts[i], elt, 0))
2713       {
2714         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2715         if (comb->coefs[i])
2716           return;
2717
2718         comb->n--;
2719         comb->coefs[i] = comb->coefs[comb->n];
2720         comb->elts[i] = comb->elts[comb->n];
2721
2722         if (comb->rest)
2723           {
2724             gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2725             comb->coefs[comb->n] = 1;
2726             comb->elts[comb->n] = comb->rest;
2727             comb->rest = NULL_TREE;
2728             comb->n++;
2729           }
2730         return;
2731       }
2732   if (comb->n < MAX_AFF_ELTS)
2733     {
2734       comb->coefs[comb->n] = scale;
2735       comb->elts[comb->n] = elt;
2736       comb->n++;
2737       return;
2738     }
2739
2740   if (scale == 1)
2741     elt = fold_convert (comb->type, elt);
2742   else
2743     elt = fold_build2 (MULT_EXPR, comb->type,
2744                        fold_convert (comb->type, elt),
2745                        build_int_cst_type (comb->type, scale));
2746
2747   if (comb->rest)
2748     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2749   else
2750     comb->rest = elt;
2751 }
2752
2753 /* Adds COMB2 to COMB1.  */
2754
2755 static void
2756 aff_combination_add (struct affine_tree_combination *comb1,
2757                      struct affine_tree_combination *comb2)
2758 {
2759   unsigned i;
2760
2761   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2762   for (i = 0; i < comb2->n; i++)
2763     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2764   if (comb2->rest)
2765     aff_combination_add_elt (comb1, comb2->rest, 1);
2766 }
2767
2768 /* Convert COMB to TYPE.  */
2769
2770 static void
2771 aff_combination_convert (tree type, struct affine_tree_combination *comb)
2772 {
2773   unsigned prec = TYPE_PRECISION (type);
2774   unsigned i;
2775
2776   /* If the precision of both types is the same, it suffices to change the type
2777      of the whole combination -- the elements are allowed to have another type
2778      equivalent wrto STRIP_NOPS.  */
2779   if (prec == TYPE_PRECISION (comb->type))
2780     {
2781       comb->type = type;
2782       return;
2783     }
2784
2785   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2786   comb->offset = comb->offset & comb->mask;
2787
2788   /* The type of the elements can be different from comb->type only as
2789      much as what STRIP_NOPS would remove.  We can just directly cast
2790      to TYPE.  */
2791   for (i = 0; i < comb->n; i++)
2792     comb->elts[i] = fold_convert (type, comb->elts[i]);
2793   if (comb->rest)
2794     comb->rest = fold_convert (type, comb->rest);
2795
2796   comb->type = type;
2797 }
2798
2799 /* Splits EXPR into an affine combination of parts.  */
2800
2801 static void
2802 tree_to_aff_combination (tree expr, tree type,
2803                          struct affine_tree_combination *comb)
2804 {
2805   struct affine_tree_combination tmp;
2806   enum tree_code code;
2807   tree cst, core, toffset;
2808   HOST_WIDE_INT bitpos, bitsize;
2809   enum machine_mode mode;
2810   int unsignedp, volatilep;
2811
2812   STRIP_NOPS (expr);
2813
2814   code = TREE_CODE (expr);
2815   switch (code)
2816     {
2817     case INTEGER_CST:
2818       aff_combination_const (comb, type, int_cst_value (expr));
2819       return;
2820
2821     case PLUS_EXPR:
2822     case MINUS_EXPR:
2823       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2824       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2825       if (code == MINUS_EXPR)
2826         aff_combination_scale (&tmp, -1);
2827       aff_combination_add (comb, &tmp);
2828       return;
2829
2830     case MULT_EXPR:
2831       cst = TREE_OPERAND (expr, 1);
2832       if (TREE_CODE (cst) != INTEGER_CST)
2833         break;
2834       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2835       aff_combination_scale (comb, int_cst_value (cst));
2836       return;
2837
2838     case NEGATE_EXPR:
2839       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2840       aff_combination_scale (comb, -1);
2841       return;
2842
2843     case ADDR_EXPR:
2844       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2845                                   &toffset, &mode, &unsignedp, &volatilep,
2846                                   false);
2847       if (bitpos % BITS_PER_UNIT != 0)
2848         break;
2849       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2850       core = build_fold_addr_expr (core);
2851       if (TREE_CODE (core) == ADDR_EXPR)
2852         aff_combination_add_elt (comb, core, 1);
2853       else
2854         {
2855           tree_to_aff_combination (core, type, &tmp);
2856           aff_combination_add (comb, &tmp);
2857         }
2858       if (toffset)
2859         {
2860           tree_to_aff_combination (toffset, type, &tmp);
2861           aff_combination_add (comb, &tmp);
2862         }
2863       return;
2864
2865     default:
2866       break;
2867     }
2868
2869   aff_combination_elt (comb, type, expr);
2870 }
2871
2872 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2873
2874 static tree
2875 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2876                  unsigned HOST_WIDE_INT mask)
2877 {
2878   enum tree_code code;
2879
2880   scale &= mask;
2881   elt = fold_convert (type, elt);
2882
2883   if (scale == 1)
2884     {
2885       if (!expr)
2886         return elt;
2887
2888       return fold_build2 (PLUS_EXPR, type, expr, elt);
2889     }
2890
2891   if (scale == mask)
2892     {
2893       if (!expr)
2894         return fold_build1 (NEGATE_EXPR, type, elt);
2895
2896       return fold_build2 (MINUS_EXPR, type, expr, elt);
2897     }
2898
2899   if (!expr)
2900     return fold_build2 (MULT_EXPR, type, elt,
2901                         build_int_cst_type (type, scale));
2902
2903   if ((scale | (mask >> 1)) == mask)
2904     {
2905       /* Scale is negative.  */
2906       code = MINUS_EXPR;
2907       scale = (-scale) & mask;
2908     }
2909   else
2910     code = PLUS_EXPR;
2911
2912   elt = fold_build2 (MULT_EXPR, type, elt,
2913                      build_int_cst_type (type, scale));
2914   return fold_build2 (code, type, expr, elt);
2915 }
2916
2917 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2918
2919 static void
2920 unshare_aff_combination (struct affine_tree_combination *comb)
2921 {
2922   unsigned i;
2923
2924   for (i = 0; i < comb->n; i++)
2925     comb->elts[i] = unshare_expr (comb->elts[i]);
2926   if (comb->rest)
2927     comb->rest = unshare_expr (comb->rest);
2928 }
2929
2930 /* Makes tree from the affine combination COMB.  */
2931
2932 static tree
2933 aff_combination_to_tree (struct affine_tree_combination *comb)
2934 {
2935   tree type = comb->type;
2936   tree expr = comb->rest;
2937   unsigned i;
2938   unsigned HOST_WIDE_INT off, sgn;
2939
2940   if (comb->n == 0 && comb->offset == 0)
2941     {
2942       if (expr)
2943         {
2944           /* Handle the special case produced by get_computation_aff when
2945              the type does not fit in HOST_WIDE_INT.  */
2946           return fold_convert (type, expr);
2947         }
2948       else
2949         return build_int_cst (type, 0);
2950     }
2951
2952   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2953
2954   for (i = 0; i < comb->n; i++)
2955     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2956                             comb->mask);
2957
2958   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2959     {
2960       /* Offset is negative.  */
2961       off = (-comb->offset) & comb->mask;
2962       sgn = comb->mask;
2963     }
2964   else
2965     {
2966       off = comb->offset;
2967       sgn = 1;
2968     }
2969   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2970                           comb->mask);
2971 }
2972
2973 /* Folds EXPR using the affine expressions framework.  */
2974
2975 static tree
2976 fold_affine_expr (tree expr)
2977 {
2978   tree type = TREE_TYPE (expr);
2979   struct affine_tree_combination comb;
2980
2981   if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2982     return expr;
2983
2984   tree_to_aff_combination (expr, type, &comb);
2985   return aff_combination_to_tree (&comb);
2986 }
2987
2988 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2989    same precision that is at least as wide as the precision of TYPE, stores
2990    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2991    type of A and B.  */
2992
2993 static tree
2994 determine_common_wider_type (tree *a, tree *b)
2995 {
2996   tree wider_type = NULL;
2997   tree suba, subb;
2998   tree atype = TREE_TYPE (*a);
2999
3000   if ((TREE_CODE (*a) == NOP_EXPR
3001        || TREE_CODE (*a) == CONVERT_EXPR))
3002     {
3003       suba = TREE_OPERAND (*a, 0);
3004       wider_type = TREE_TYPE (suba);
3005       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3006         return atype;
3007     }
3008   else
3009     return atype;
3010
3011   if ((TREE_CODE (*b) == NOP_EXPR
3012        || TREE_CODE (*b) == CONVERT_EXPR))
3013     {
3014       subb = TREE_OPERAND (*b, 0);
3015       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3016         return atype;
3017     }
3018   else
3019     return atype;
3020
3021   *a = suba;
3022   *b = subb;
3023   return wider_type;
3024 }
3025
3026 /* Determines the expression by that USE is expressed from induction variable
3027    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3028    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3029
3030 static bool
3031 get_computation_aff (struct loop *loop,
3032                      struct iv_use *use, struct iv_cand *cand, tree at,
3033                      struct affine_tree_combination *aff)
3034 {
3035   tree ubase = use->iv->base;
3036   tree ustep = use->iv->step;
3037   tree cbase = cand->iv->base;
3038   tree cstep = cand->iv->step;
3039   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3040   tree common_type;
3041   tree uutype;
3042   tree expr, delta;
3043   tree ratio;
3044   unsigned HOST_WIDE_INT ustepi, cstepi;
3045   HOST_WIDE_INT ratioi;
3046   struct affine_tree_combination cbase_aff, expr_aff;
3047   tree cstep_orig = cstep, ustep_orig = ustep;
3048   double_int rat;
3049
3050   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3051     {
3052       /* We do not have a precision to express the values of use.  */
3053       return false;
3054     }
3055
3056   expr = var_at_stmt (loop, cand, at);
3057
3058   if (TREE_TYPE (expr) != ctype)
3059     {
3060       /* This may happen with the original ivs.  */
3061       expr = fold_convert (ctype, expr);
3062     }
3063
3064   if (TYPE_UNSIGNED (utype))
3065     uutype = utype;
3066   else
3067     {
3068       uutype = unsigned_type_for (utype);
3069       ubase = fold_convert (uutype, ubase);
3070       ustep = fold_convert (uutype, ustep);
3071     }
3072
3073   if (uutype != ctype)
3074     {
3075       expr = fold_convert (uutype, expr);
3076       cbase = fold_convert (uutype, cbase);
3077       cstep = fold_convert (uutype, cstep);
3078
3079       /* If the conversion is not noop, we must take it into account when
3080          considering the value of the step.  */
3081       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3082         cstep_orig = cstep;
3083     }
3084
3085   if (cst_and_fits_in_hwi (cstep_orig)
3086       && cst_and_fits_in_hwi (ustep_orig))
3087     {
3088       ustepi = int_cst_value (ustep_orig);
3089       cstepi = int_cst_value (cstep_orig);
3090
3091       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3092         {
3093           /* TODO maybe consider case when ustep divides cstep and the ratio is
3094              a power of 2 (so that the division is fast to execute)?  We would
3095              need to be much more careful with overflows etc. then.  */
3096           return false;
3097         }
3098
3099       ratio = build_int_cst_type (uutype, ratioi);
3100     }
3101   else
3102     {
3103       if (!constant_multiple_of (ustep_orig, cstep_orig, &rat))
3104         return false;
3105       ratio = double_int_to_tree (uutype, rat);
3106
3107       /* Ratioi is only used to detect special cases when the multiplicative
3108          factor is 1 or -1, so if rat does not fit to HOST_WIDE_INT, we may
3109          set it to 0.  */
3110       if (double_int_fits_in_shwi_p (rat))
3111         ratioi = double_int_to_shwi (rat);
3112       else
3113         ratioi = 0;
3114     }
3115
3116   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3117      type, we achieve better folding by computing their difference in this
3118      wider type, and cast the result to UUTYPE.  We do not need to worry about
3119      overflows, as all the arithmetics will in the end be performed in UUTYPE
3120      anyway.  */
3121   common_type = determine_common_wider_type (&ubase, &cbase);
3122
3123   /* We may need to shift the value if we are after the increment.  */
3124   if (stmt_after_increment (loop, cand, at))
3125     {
3126       if (uutype != common_type)
3127         cstep = fold_convert (common_type, cstep);
3128       cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep);
3129     }
3130
3131   /* use = ubase - ratio * cbase + ratio * var.
3132
3133      In general case ubase + ratio * (var - cbase) could be better (one less
3134      multiplication), but often it is possible to eliminate redundant parts
3135      of computations from (ubase - ratio * cbase) term, and if it does not
3136      happen, fold is able to apply the distributive law to obtain this form
3137      anyway.  */
3138
3139   if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT)
3140     {
3141       /* Let's compute in trees and just return the result in AFF.  This case
3142          should not be very common, and fold itself is not that bad either,
3143          so making the aff. functions more complicated to handle this case
3144          is not that urgent.  */
3145       if (ratioi == 1)
3146         {
3147           delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase);
3148           if (uutype != common_type)
3149             delta = fold_convert (uutype, delta);
3150           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3151         }
3152       else if (ratioi == -1)
3153         {
3154           delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase);
3155           if (uutype != common_type)
3156             delta = fold_convert (uutype, delta);
3157           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3158         }
3159       else
3160         {
3161           delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio);
3162           delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta);
3163           if (uutype != common_type)
3164             delta = fold_convert (uutype, delta);
3165           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3166           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3167         }
3168
3169       aff->type = uutype;
3170       aff->n = 0;
3171       aff->offset = 0;
3172       aff->mask = 0;
3173       aff->rest = expr;
3174       return true;
3175     }
3176
3177   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3178      possible to compute ratioi.  */
3179   gcc_assert (ratioi);
3180
3181   tree_to_aff_combination (ubase, common_type, aff);
3182   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3183   tree_to_aff_combination (expr, uutype, &expr_aff);
3184   aff_combination_scale (&cbase_aff, -ratioi);
3185   aff_combination_scale (&expr_aff, ratioi);
3186   aff_combination_add (aff, &cbase_aff);
3187   if (common_type != uutype)
3188     aff_combination_convert (uutype, aff);
3189   aff_combination_add (aff, &expr_aff);
3190
3191   return true;
3192 }
3193
3194 /* Determines the expression by that USE is expressed from induction variable
3195    CAND at statement AT in LOOP.  The computation is unshared.  */
3196
3197 static tree
3198 get_computation_at (struct loop *loop,
3199                     struct iv_use *use, struct iv_cand *cand, tree at)
3200 {
3201   struct affine_tree_combination aff;
3202   tree type = TREE_TYPE (use->iv->base);
3203
3204   if (!get_computation_aff (loop, use, cand, at, &aff))
3205     return NULL_TREE;
3206   unshare_aff_combination (&aff);
3207   return fold_convert (type, aff_combination_to_tree (&aff));
3208 }
3209
3210 /* Determines the expression by that USE is expressed from induction variable
3211    CAND in LOOP.  The computation is unshared.  */
3212
3213 static tree
3214 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3215 {
3216   return get_computation_at (loop, use, cand, use->stmt);
3217 }
3218
3219 /* Returns cost of addition in MODE.  */
3220
3221 static unsigned
3222 add_cost (enum machine_mode mode)
3223 {
3224   static unsigned costs[NUM_MACHINE_MODES];
3225   rtx seq;
3226   unsigned cost;
3227
3228   if (costs[mode])
3229     return costs[mode];
3230
3231   start_sequence ();
3232   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3233                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3234                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3235                  NULL_RTX);
3236   seq = get_insns ();
3237   end_sequence ();
3238
3239   cost = seq_cost (seq);
3240   if (!cost)
3241     cost = 1;
3242
3243   costs[mode] = cost;
3244
3245   if (dump_file && (dump_flags & TDF_DETAILS))
3246     fprintf (dump_file, "Addition in %s costs %d\n",
3247              GET_MODE_NAME (mode), cost);
3248   return cost;
3249 }
3250
3251 /* Entry in a hashtable of already known costs for multiplication.  */
3252 struct mbc_entry
3253 {
3254   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3255   enum machine_mode mode;       /* In mode.  */
3256   unsigned cost;                /* The cost.  */
3257 };
3258
3259 /* Counts hash value for the ENTRY.  */
3260
3261 static hashval_t
3262 mbc_entry_hash (const void *entry)
3263 {
3264   const struct mbc_entry *e = entry;
3265
3266   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3267 }
3268
3269 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3270
3271 static int
3272 mbc_entry_eq (const void *entry1, const void *entry2)
3273 {
3274   const struct mbc_entry *e1 = entry1;
3275   const struct mbc_entry *e2 = entry2;
3276
3277   return (e1->mode == e2->mode
3278           && e1->cst == e2->cst);
3279 }
3280
3281 /* Returns cost of multiplication by constant CST in MODE.  */
3282
3283 unsigned
3284 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3285 {
3286   static htab_t costs;
3287   struct mbc_entry **cached, act;
3288   rtx seq;
3289   unsigned cost;
3290
3291   if (!costs)
3292     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3293
3294   act.mode = mode;
3295   act.cst = cst;
3296   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3297   if (*cached)
3298     return (*cached)->cost;
3299
3300   *cached = XNEW (struct mbc_entry);
3301   (*cached)->mode = mode;
3302   (*cached)->cst = cst;
3303
3304   start_sequence ();
3305   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3306                gen_int_mode (cst, mode), NULL_RTX, 0);
3307   seq = get_insns ();
3308   end_sequence ();
3309
3310   cost = seq_cost (seq);
3311
3312   if (dump_file && (dump_flags & TDF_DETAILS))
3313     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3314              (int) cst, GET_MODE_NAME (mode), cost);
3315
3316   (*cached)->cost = cost;
3317
3318   return cost;
3319 }
3320
3321 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3322    validity for a memory reference accessing memory of mode MODE.  */
3323
3324 bool
3325 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
3326 {
3327 #define MAX_RATIO 128
3328   static sbitmap valid_mult[MAX_MACHINE_MODE];
3329
3330   if (!valid_mult[mode])
3331     {
3332       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3333       rtx addr;
3334       HOST_WIDE_INT i;
3335
3336       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
3337       sbitmap_zero (valid_mult[mode]);
3338       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3339       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3340         {
3341           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3342           if (memory_address_p (mode, addr))
3343             SET_BIT (valid_mult[mode], i + MAX_RATIO);
3344         }
3345
3346       if (dump_file && (dump_flags & TDF_DETAILS))
3347         {
3348           fprintf (dump_file, "  allowed multipliers:");
3349           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3350             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
3351               fprintf (dump_file, " %d", (int) i);
3352           fprintf (dump_file, "\n");
3353           fprintf (dump_file, "\n");
3354         }
3355     }
3356
3357   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3358     return false;
3359
3360   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
3361 }
3362
3363 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3364    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3365    variable is omitted.  Compute the cost for a memory reference that accesses
3366    a memory location of mode MEM_MODE.
3367
3368    TODO -- there must be some better way.  This all is quite crude.  */
3369
3370 static unsigned
3371 get_address_cost (bool symbol_present, bool var_present,
3372                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3373                   enum machine_mode mem_mode)
3374 {
3375   static bool initialized[MAX_MACHINE_MODE];
3376   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3377   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3378   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3379   unsigned cost, acost;
3380   bool offset_p, ratio_p;
3381   HOST_WIDE_INT s_offset;
3382   unsigned HOST_WIDE_INT mask;
3383   unsigned bits;
3384
3385   if (!initialized[mem_mode])
3386     {
3387       HOST_WIDE_INT i;
3388       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3389       int old_cse_not_expected;
3390       unsigned sym_p, var_p, off_p, rat_p, add_c;
3391       rtx seq, addr, base;
3392       rtx reg0, reg1;
3393
3394       initialized[mem_mode] = true;
3395
3396       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3397
3398       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3399       for (i = start; i <= 1 << 20; i <<= 1)
3400         {
3401           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3402           if (!memory_address_p (mem_mode, addr))
3403             break;
3404         }
3405       max_offset[mem_mode] = i == start ? 0 : i >> 1;
3406       off[mem_mode] = max_offset[mem_mode];
3407
3408       for (i = start; i <= 1 << 20; i <<= 1)
3409         {
3410           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3411           if (!memory_address_p (mem_mode, addr))
3412             break;
3413         }
3414       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3415
3416       if (dump_file && (dump_flags & TDF_DETAILS))
3417         {
3418           fprintf (dump_file, "get_address_cost:\n");
3419           fprintf (dump_file, "  min offset %s %d\n",
3420                    GET_MODE_NAME (mem_mode),
3421                    (int) min_offset[mem_mode]);
3422           fprintf (dump_file, "  max offset %s %d\n",
3423                    GET_MODE_NAME (mem_mode),
3424                    (int) max_offset[mem_mode]);
3425         }
3426
3427       rat[mem_mode] = 1;
3428       for (i = 2; i <= MAX_RATIO; i++)
3429         if (multiplier_allowed_in_address_p (i, mem_mode))
3430           {
3431             rat[mem_mode] = i;
3432             break;
3433           }
3434
3435       /* Compute the cost of various addressing modes.  */
3436       acost = 0;
3437       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3438       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3439
3440       for (i = 0; i < 16; i++)
3441         {
3442           sym_p = i & 1;
3443           var_p = (i >> 1) & 1;
3444           off_p = (i >> 2) & 1;
3445           rat_p = (i >> 3) & 1;
3446
3447           addr = reg0;
3448           if (rat_p)
3449             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3450                                    gen_int_mode (rat[mem_mode], Pmode));
3451
3452           if (var_p)
3453             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3454
3455           if (sym_p)
3456             {
3457               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3458               if (off_p)
3459                 base = gen_rtx_fmt_e (CONST, Pmode,
3460                                       gen_rtx_fmt_ee (PLUS, Pmode,
3461                                                       base,
3462                                                       gen_int_mode (off[mem_mode],
3463                                                                     Pmode)));
3464             }
3465           else if (off_p)
3466             base = gen_int_mode (off[mem_mode], Pmode);
3467           else
3468             base = NULL_RTX;
3469
3470           if (base)
3471             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3472
3473           start_sequence ();
3474           /* To avoid splitting addressing modes, pretend that no cse will
3475              follow.  */
3476           old_cse_not_expected = cse_not_expected;
3477           cse_not_expected = true;
3478           addr = memory_address (mem_mode, addr);
3479           cse_not_expected = old_cse_not_expected;
3480           seq = get_insns ();
3481           end_sequence ();
3482
3483           acost = seq_cost (seq);
3484           acost += address_cost (addr, mem_mode);
3485
3486           if (!acost)
3487             acost = 1;
3488           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3489         }
3490
3491       /* On some targets, it is quite expensive to load symbol to a register,
3492          which makes addresses that contain symbols look much more expensive.
3493          However, the symbol will have to be loaded in any case before the
3494          loop (and quite likely we have it in register already), so it does not
3495          make much sense to penalize them too heavily.  So make some final
3496          tweaks for the SYMBOL_PRESENT modes:
3497
3498          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3499          var is cheaper, use this mode with small penalty.
3500          If VAR_PRESENT is true, try whether the mode with
3501          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3502          if this is the case, use it.  */
3503       add_c = add_cost (Pmode);
3504       for (i = 0; i < 8; i++)
3505         {
3506           var_p = i & 1;
3507           off_p = (i >> 1) & 1;
3508           rat_p = (i >> 2) & 1;
3509
3510           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3511           if (var_p)
3512             acost += add_c;
3513
3514           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3515             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3516         }
3517
3518       if (dump_file && (dump_flags & TDF_DETAILS))
3519         {
3520           fprintf (dump_file, "Address costs:\n");
3521
3522           for (i = 0; i < 16; i++)
3523             {
3524               sym_p = i & 1;
3525               var_p = (i >> 1) & 1;
3526               off_p = (i >> 2) & 1;
3527               rat_p = (i >> 3) & 1;
3528
3529               fprintf (dump_file, "  ");
3530               if (sym_p)
3531                 fprintf (dump_file, "sym + ");
3532               if (var_p)
3533                 fprintf (dump_file, "var + ");
3534               if (off_p)
3535                 fprintf (dump_file, "cst + ");
3536               if (rat_p)
3537                 fprintf (dump_file, "rat * ");
3538
3539               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3540               fprintf (dump_file, "index costs %d\n", acost);
3541             }
3542           fprintf (dump_file, "\n");
3543         }
3544     }
3545
3546   bits = GET_MODE_BITSIZE (Pmode);
3547   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3548   offset &= mask;
3549   if ((offset >> (bits - 1) & 1))
3550     offset |= ~mask;
3551   s_offset = offset;
3552
3553   cost = 0;
3554   offset_p = (s_offset != 0
3555               && min_offset[mem_mode] <= s_offset
3556               && s_offset <= max_offset[mem_mode]);
3557   ratio_p = (ratio != 1
3558              && multiplier_allowed_in_address_p (ratio, mem_mode));
3559
3560   if (ratio != 1 && !ratio_p)
3561     cost += multiply_by_cost (ratio, Pmode);
3562
3563   if (s_offset && !offset_p && !symbol_present)
3564     {
3565       cost += add_cost (Pmode);
3566       var_present = true;
3567     }
3568
3569   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3570   return cost + acost;
3571 }
3572
3573 /* Estimates cost of forcing expression EXPR into a variable.  */
3574
3575 unsigned
3576 force_expr_to_var_cost (tree expr)
3577 {
3578   static bool costs_initialized = false;
3579   static unsigned integer_cost;
3580   static unsigned symbol_cost;
3581   static unsigned address_cost;
3582   tree op0, op1;
3583   unsigned cost0, cost1, cost;
3584   enum machine_mode mode;
3585
3586   if (!costs_initialized)
3587     {
3588       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3589       rtx x = gen_rtx_MEM (DECL_MODE (var),
3590                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3591       tree addr;
3592       tree type = build_pointer_type (integer_type_node);
3593
3594       integer_cost = computation_cost (build_int_cst (integer_type_node,
3595                                                       2000));
3596
3597       SET_DECL_RTL (var, x);
3598       TREE_STATIC (var) = 1;
3599       addr = build1 (ADDR_EXPR, type, var);
3600       symbol_cost = computation_cost (addr) + 1;
3601
3602       address_cost
3603         = computation_cost (build2 (PLUS_EXPR, type,
3604                                     addr,
3605                                     build_int_cst (type, 2000))) + 1;
3606       if (dump_file && (dump_flags & TDF_DETAILS))
3607         {
3608           fprintf (dump_file, "force_expr_to_var_cost:\n");
3609           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3610           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3611           fprintf (dump_file, "  address %d\n", (int) address_cost);
3612           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3613           fprintf (dump_file, "\n");
3614         }
3615
3616       costs_initialized = true;
3617     }
3618
3619   STRIP_NOPS (expr);
3620
3621   if (SSA_VAR_P (expr))
3622     return 0;
3623
3624   if (TREE_INVARIANT (expr))
3625     {
3626       if (TREE_CODE (expr) == INTEGER_CST)
3627         return integer_cost;
3628
3629       if (TREE_CODE (expr) == ADDR_EXPR)
3630         {
3631           tree obj = TREE_OPERAND (expr, 0);
3632
3633           if (TREE_CODE (obj) == VAR_DECL
3634               || TREE_CODE (obj) == PARM_DECL
3635               || TREE_CODE (obj) == RESULT_DECL)
3636             return symbol_cost;
3637         }
3638
3639       return address_cost;
3640     }
3641
3642   switch (TREE_CODE (expr))
3643     {
3644     case PLUS_EXPR:
3645     case MINUS_EXPR:
3646     case MULT_EXPR:
3647       op0 = TREE_OPERAND (expr, 0);
3648       op1 = TREE_OPERAND (expr, 1);
3649       STRIP_NOPS (op0);
3650       STRIP_NOPS (op1);
3651
3652       if (is_gimple_val (op0))
3653         cost0 = 0;
3654       else
3655         cost0 = force_expr_to_var_cost (op0);
3656
3657       if (is_gimple_val (op1))
3658         cost1 = 0;
3659       else
3660         cost1 = force_expr_to_var_cost (op1);
3661
3662       break;
3663
3664     default:
3665       /* Just an arbitrary value, FIXME.  */
3666       return target_spill_cost;
3667     }
3668
3669   mode = TYPE_MODE (TREE_TYPE (expr));
3670   switch (TREE_CODE (expr))
3671     {
3672     case PLUS_EXPR:
3673     case MINUS_EXPR:
3674       cost = add_cost (mode);
3675       break;
3676
3677     case MULT_EXPR:
3678       if (cst_and_fits_in_hwi (op0))
3679         cost = multiply_by_cost (int_cst_value (op0), mode);
3680       else if (cst_and_fits_in_hwi (op1))
3681         cost = multiply_by_cost (int_cst_value (op1), mode);
3682       else
3683         return target_spill_cost;
3684       break;
3685
3686     default:
3687       gcc_unreachable ();
3688     }
3689
3690   cost += cost0;
3691   cost += cost1;
3692
3693   /* Bound the cost by target_spill_cost.  The parts of complicated
3694      computations often are either loop invariant or at least can
3695      be shared between several iv uses, so letting this grow without
3696      limits would not give reasonable results.  */
3697   return cost < target_spill_cost ? cost : target_spill_cost;
3698 }
3699
3700 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3701    invariants the computation depends on.  */
3702
3703 static unsigned
3704 force_var_cost (struct ivopts_data *data,
3705                 tree expr, bitmap *depends_on)
3706 {
3707   if (depends_on)
3708     {
3709       fd_ivopts_data = data;
3710       walk_tree (&expr, find_depends, depends_on, NULL);
3711     }
3712
3713   return force_expr_to_var_cost (expr);
3714 }
3715
3716 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3717    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3718    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3719    invariants the computation depends on.  */
3720
3721 static unsigned
3722 split_address_cost (struct ivopts_data *data,
3723                     tree addr, bool *symbol_present, bool *var_present,
3724                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3725 {
3726   tree core;
3727   HOST_WIDE_INT bitsize;
3728   HOST_WIDE_INT bitpos;
3729   tree toffset;
3730   enum machine_mode mode;
3731   int unsignedp, volatilep;
3732
3733   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3734                               &unsignedp, &volatilep, false);
3735
3736   if (toffset != 0
3737       || bitpos % BITS_PER_UNIT != 0
3738       || TREE_CODE (core) != VAR_DECL)
3739     {
3740       *symbol_present = false;
3741       *var_present = true;
3742       fd_ivopts_data = data;
3743       walk_tree (&addr, find_depends, depends_on, NULL);
3744       return target_spill_cost;
3745     }
3746
3747   *offset += bitpos / BITS_PER_UNIT;
3748   if (TREE_STATIC (core)
3749       || DECL_EXTERNAL (core))
3750     {
3751       *symbol_present = true;
3752       *var_present = false;
3753       return 0;
3754     }
3755
3756   *symbol_present = false;
3757   *var_present = true;
3758   return 0;
3759 }
3760
3761 /* Estimates cost of expressing difference of addresses E1 - E2 as
3762    var + symbol + offset.  The value of offset is added to OFFSET,
3763    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3764    part is missing.  DEPENDS_ON is a set of the invariants the computation
3765    depends on.  */
3766
3767 static unsigned
3768 ptr_difference_cost (struct ivopts_data *data,
3769                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3770                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3771 {
3772   HOST_WIDE_INT diff = 0;
3773   unsigned cost;
3774
3775   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3776
3777   if (ptr_difference_const (e1, e2, &diff))
3778     {
3779       *offset += diff;
3780       *symbol_present = false;
3781       *var_present = false;
3782       return 0;
3783     }
3784
3785   if (e2 == integer_zero_node)
3786     return split_address_cost (data, TREE_OPERAND (e1, 0),
3787                                symbol_present, var_present, offset, depends_on);
3788
3789   *symbol_present = false;
3790   *var_present = true;
3791
3792   cost = force_var_cost (data, e1, depends_on);
3793   cost += force_var_cost (data, e2, depends_on);
3794   cost += add_cost (Pmode);
3795
3796   return cost;
3797 }
3798
3799 /* Estimates cost of expressing difference E1 - E2 as
3800    var + symbol + offset.  The value of offset is added to OFFSET,
3801    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3802    part is missing.  DEPENDS_ON is a set of the invariants the computation
3803    depends on.  */
3804
3805 static unsigned
3806 difference_cost (struct ivopts_data *data,
3807                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3808                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3809 {
3810   unsigned cost;
3811   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3812   unsigned HOST_WIDE_INT off1, off2;
3813
3814   e1 = strip_offset (e1, &off1);
3815   e2 = strip_offset (e2, &off2);
3816   *offset += off1 - off2;
3817
3818   STRIP_NOPS (e1);
3819   STRIP_NOPS (e2);
3820
3821   if (TREE_CODE (e1) == ADDR_EXPR)
3822     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3823                                 depends_on);
3824   *symbol_present = false;
3825
3826   if (operand_equal_p (e1, e2, 0))
3827     {
3828       *var_present = false;
3829       return 0;
3830     }
3831   *var_present = true;
3832   if (zero_p (e2))
3833     return force_var_cost (data, e1, depends_on);
3834
3835   if (zero_p (e1))
3836     {
3837       cost = force_var_cost (data, e2, depends_on);
3838       cost += multiply_by_cost (-1, mode);
3839
3840       return cost;
3841     }
3842
3843   cost = force_var_cost (data, e1, depends_on);
3844   cost += force_var_cost (data, e2, depends_on);
3845   cost += add_cost (mode);
3846
3847   return cost;
3848 }
3849
3850 /* Determines the cost of the computation by that USE is expressed
3851    from induction variable CAND.  If ADDRESS_P is true, we just need
3852    to create an address from it, otherwise we want to get it into
3853    register.  A set of invariants we depend on is stored in
3854    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3855
3856 static unsigned
3857 get_computation_cost_at (struct ivopts_data *data,
3858                          struct iv_use *use, struct iv_cand *cand,
3859                          bool address_p, bitmap *depends_on, tree at)
3860 {
3861   tree ubase = use->iv->base, ustep = use->iv->step;
3862   tree cbase, cstep;
3863   tree utype = TREE_TYPE (ubase), ctype;
3864   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3865   HOST_WIDE_INT ratio, aratio;
3866   bool var_present, symbol_present;
3867   unsigned cost = 0, n_sums;
3868
3869   *depends_on = NULL;
3870
3871   /* Only consider real candidates.  */
3872   if (!cand->iv)
3873     return INFTY;
3874
3875   cbase = cand->iv->base;
3876   cstep = cand->iv->step;
3877   ctype = TREE_TYPE (cbase);
3878
3879   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3880     {
3881       /* We do not have a precision to express the values of use.  */
3882       return INFTY;
3883     }
3884
3885   if (address_p)
3886     {
3887       /* Do not try to express address of an object with computation based
3888          on address of a different object.  This may cause problems in rtl
3889          level alias analysis (that does not expect this to be happening,
3890          as this is illegal in C), and would be unlikely to be useful
3891          anyway.  */
3892       if (use->iv->base_object
3893           && cand->iv->base_object
3894           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3895         return INFTY;
3896     }
3897
3898   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3899     {
3900       /* TODO -- add direct handling of this case.  */
3901       goto fallback;
3902     }
3903
3904   /* CSTEPI is removed from the offset in case statement is after the
3905      increment.  If the step is not constant, we use zero instead.
3906      This is a bit imprecise (there is the extra addition), but
3907      redundancy elimination is likely to transform the code so that
3908      it uses value of the variable before increment anyway,
3909      so it is not that much unrealistic.  */
3910   if (cst_and_fits_in_hwi (cstep))
3911     cstepi = int_cst_value (cstep);
3912   else
3913     cstepi = 0;
3914
3915   if (cst_and_fits_in_hwi (ustep)
3916       && cst_and_fits_in_hwi (cstep))
3917     {
3918       ustepi = int_cst_value (ustep);
3919
3920       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3921         return INFTY;
3922     }
3923   else
3924     {
3925       double_int rat;
3926
3927       if (!constant_multiple_of (ustep, cstep, &rat))
3928         return INFTY;
3929
3930       if (double_int_fits_in_shwi_p (rat))
3931         ratio = double_int_to_shwi (rat);
3932       else
3933         return INFTY;
3934     }
3935
3936   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3937      or ratio == 1, it is better to handle this like
3938
3939      ubase - ratio * cbase + ratio * var
3940
3941      (also holds in the case ratio == -1, TODO.  */
3942
3943   if (cst_and_fits_in_hwi (cbase))
3944     {
3945       offset = - ratio * int_cst_value (cbase);
3946       cost += difference_cost (data,
3947                                ubase, integer_zero_node,
3948                                &symbol_present, &var_present, &offset,
3949                                depends_on);
3950     }
3951   else if (ratio == 1)
3952     {
3953       cost += difference_cost (data,
3954                                ubase, cbase,
3955                                &symbol_present, &var_present, &offset,
3956                                depends_on);
3957     }
3958   else
3959     {
3960       cost += force_var_cost (data, cbase, depends_on);
3961       cost += add_cost (TYPE_MODE (ctype));
3962       cost += difference_cost (data,
3963                                ubase, integer_zero_node,
3964                                &symbol_present, &var_present, &offset,
3965                                depends_on);
3966     }
3967
3968   /* If we are after the increment, the value of the candidate is higher by
3969      one iteration.  */
3970   if (stmt_after_increment (data->current_loop, cand, at))
3971     offset -= ratio * cstepi;
3972
3973   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3974      (symbol/var/const parts may be omitted).  If we are looking for an address,
3975      find the cost of addressing this.  */
3976   if (address_p)
3977     return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3978                                     TYPE_MODE (TREE_TYPE (*use->op_p)));
3979
3980   /* Otherwise estimate the costs for computing the expression.  */
3981   aratio = ratio > 0 ? ratio : -ratio;
3982   if (!symbol_present && !var_present && !offset)
3983     {
3984       if (ratio != 1)
3985         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3986
3987       return cost;
3988     }
3989
3990   if (aratio != 1)
3991     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3992
3993   n_sums = 1;
3994   if (var_present
3995       /* Symbol + offset should be compile-time computable.  */
3996       && (symbol_present || offset))
3997     n_sums++;
3998
3999   return cost + n_sums * add_cost (TYPE_MODE (ctype));
4000
4001 fallback:
4002   {
4003     /* Just get the expression, expand it and measure the cost.  */
4004     tree comp = get_computation_at (data->current_loop, use, cand, at);
4005
4006     if (!comp)
4007       return INFTY;
4008
4009     if (address_p)
4010       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
4011
4012     return computation_cost (comp);
4013   }
4014 }
4015
4016 /* Determines the cost of the computation by that USE is expressed
4017    from induction variable CAND.  If ADDRESS_P is true, we just need
4018    to create an address from it, otherwise we want to get it into
4019    register.  A set of invariants we depend on is stored in
4020    DEPENDS_ON.  */
4021
4022 static unsigned
4023 get_computation_cost (struct ivopts_data *data,
4024                       struct iv_use *use, struct iv_cand *cand,
4025                       bool address_p, bitmap *depends_on)
4026 {
4027   return get_computation_cost_at (data,
4028                                   use, cand, address_p, depends_on, use->stmt);
4029 }
4030
4031 /* Determines cost of basing replacement of USE on CAND in a generic
4032    expression.  */
4033
4034 static bool
4035 determine_use_iv_cost_generic (struct ivopts_data *data,
4036                                struct iv_use *use, struct iv_cand *cand)
4037 {
4038   bitmap depends_on;
4039   unsigned cost;
4040
4041   /* The simple case first -- if we need to express value of the preserved
4042      original biv, the cost is 0.  This also prevents us from counting the
4043      cost of increment twice -- once at this use and once in the cost of
4044      the candidate.  */
4045   if (cand->pos == IP_ORIGINAL
4046       && cand->incremented_at == use->stmt)
4047     {
4048       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4049       return true;
4050     }
4051
4052   cost = get_computation_cost (data, use, cand, false, &depends_on);
4053   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4054
4055   return cost != INFTY;
4056 }
4057
4058 /* Determines cost of basing replacement of USE on CAND in an address.  */
4059
4060 static bool
4061 determine_use_iv_cost_address (struct ivopts_data *data,
4062                                struct iv_use *use, struct iv_cand *cand)
4063 {
4064   bitmap depends_on;
4065   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
4066
4067   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4068
4069   return cost != INFTY;
4070 }
4071
4072 /* Computes value of induction variable IV in iteration NITER.  */
4073
4074 static tree
4075 iv_value (struct iv *iv, tree niter)
4076 {
4077   tree val;
4078   tree type = TREE_TYPE (iv->base);
4079
4080   niter = fold_convert (type, niter);
4081   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
4082
4083   return fold_build2 (PLUS_EXPR, type, iv->base, val);
4084 }
4085
4086 /* Computes value of candidate CAND at position AT in iteration NITER.  */
4087
4088 static tree
4089 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
4090 {
4091   tree val = iv_value (cand->iv, niter);
4092   tree type = TREE_TYPE (cand->iv->base);
4093
4094   if (stmt_after_increment (loop, cand, at))
4095     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
4096
4097   return val;
4098 }
4099
4100 /* Returns period of induction variable iv.  */
4101
4102 static tree
4103 iv_period (struct iv *iv)
4104 {
4105   tree step = iv->step, period, type;
4106   tree pow2div;
4107
4108   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4109
4110   /* Period of the iv is gcd (step, type range).  Since type range is power
4111      of two, it suffices to determine the maximum power of two that divides
4112      step.  */
4113   pow2div = num_ending_zeros (step);
4114   type = unsigned_type_for (TREE_TYPE (step));
4115
4116   period = build_low_bits_mask (type,
4117                                 (TYPE_PRECISION (type)
4118                                  - tree_low_cst (pow2div, 1)));
4119
4120   return period;
4121 }
4122
4123 /* Returns the comparison operator used when eliminating the iv USE.  */
4124
4125 static enum tree_code
4126 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4127 {
4128   struct loop *loop = data->current_loop;
4129   basic_block ex_bb;
4130   edge exit;
4131
4132   ex_bb = bb_for_stmt (use->stmt);
4133   exit = EDGE_SUCC (ex_bb, 0);
4134   if (flow_bb_inside_loop_p (loop, exit->dest))
4135     exit = EDGE_SUCC (ex_bb, 1);
4136
4137   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4138 }
4139
4140 /* Check whether it is possible to express the condition in USE by comparison
4141    of candidate CAND.  If so, store the value compared with to BOUND.  */
4142
4143 static bool
4144 may_eliminate_iv (struct ivopts_data *data,
4145                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4146 {
4147   basic_block ex_bb;
4148   edge exit;
4149   tree nit, nit_type;
4150   tree wider_type, period, per_type;
4151   struct loop *loop = data->current_loop;
4152
4153   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4154     return false;
4155
4156   /* For now works only for exits that dominate the loop latch.  TODO -- extend
4157      for other conditions inside loop body.  */
4158   ex_bb = bb_for_stmt (use->stmt);
4159   if (use->stmt != last_stmt (ex_bb)
4160       || TREE_CODE (use->stmt) != COND_EXPR)
4161     return false;
4162   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4163     return false;
4164
4165   exit = EDGE_SUCC (ex_bb, 0);
4166   if (flow_bb_inside_loop_p (loop, exit->dest))
4167     exit = EDGE_SUCC (ex_bb, 1);
4168   if (flow_bb_inside_loop_p (loop, exit->dest))
4169     return false;
4170
4171   nit = niter_for_exit (data, exit);
4172   if (!nit)
4173     return false;
4174
4175   nit_type = TREE_TYPE (nit);
4176
4177   /* Determine whether we may use the variable to test whether niter iterations
4178      elapsed.  This is the case iff the period of the induction variable is
4179      greater than the number of iterations.  */
4180   period = iv_period (cand->iv);
4181   if (!period)
4182     return false;
4183   per_type = TREE_TYPE (period);
4184
4185   wider_type = TREE_TYPE (period);
4186   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4187     wider_type = per_type;
4188   else
4189     wider_type = nit_type;
4190
4191   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4192                                       fold_convert (wider_type, period),
4193                                       fold_convert (wider_type, nit))))
4194     return false;
4195
4196   *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
4197   return true;
4198 }
4199
4200 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4201
4202 static bool
4203 determine_use_iv_cost_condition (struct ivopts_data *data,
4204                                  struct iv_use *use, struct iv_cand *cand)
4205 {
4206   tree bound = NULL_TREE, op, cond;
4207   bitmap depends_on = NULL;
4208   unsigned cost;
4209
4210   /* Only consider real candidates.  */
4211   if (!cand->iv)
4212     {
4213       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4214       return false;
4215     }
4216
4217   if (may_eliminate_iv (data, use, cand, &bound))
4218     {
4219       cost = force_var_cost (data, bound, &depends_on);
4220
4221       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4222       return cost != INFTY;
4223     }
4224
4225   /* The induction variable elimination failed; just express the original
4226      giv.  If it is compared with an invariant, note that we cannot get
4227      rid of it.  */
4228   cost = get_computation_cost (data, use, cand, false, &depends_on);
4229
4230   cond = *use->op_p;
4231   if (TREE_CODE (cond) != SSA_NAME)
4232     {
4233       op = TREE_OPERAND (cond, 0);
4234       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4235         op = TREE_OPERAND (cond, 1);
4236       if (TREE_CODE (op) == SSA_NAME)
4237         {
4238           op = get_iv (data, op)->base;
4239           fd_ivopts_data = data;
4240           walk_tree (&op, find_depends, &depends_on, NULL);
4241         }
4242     }
4243
4244   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4245   return cost != INFTY;
4246 }
4247
4248 /* Determines cost of basing replacement of USE on CAND.  Returns false
4249    if USE cannot be based on CAND.  */
4250
4251 static bool
4252 determine_use_iv_cost (struct ivopts_data *data,
4253                        struct iv_use *use, struct iv_cand *cand)
4254 {
4255   switch (use->type)
4256     {
4257     case USE_NONLINEAR_EXPR:
4258       return determine_use_iv_cost_generic (data, use, cand);
4259
4260     case USE_ADDRESS:
4261       return determine_use_iv_cost_address (data, use, cand);
4262
4263     case USE_COMPARE:
4264       return determine_use_iv_cost_condition (data, use, cand);
4265
4266     default:
4267       gcc_unreachable ();
4268     }
4269 }
4270
4271 /* Determines costs of basing the use of the iv on an iv candidate.  */
4272
4273 static void
4274 determine_use_iv_costs (struct ivopts_data *data)
4275 {
4276   unsigned i, j;
4277   struct iv_use *use;
4278   struct iv_cand *cand;
4279   bitmap to_clear = BITMAP_ALLOC (NULL);
4280
4281   alloc_use_cost_map (data);
4282
4283   for (i = 0; i < n_iv_uses (data); i++)
4284     {
4285       use = iv_use (data, i);
4286
4287       if (data->consider_all_candidates)
4288         {
4289           for (j = 0; j < n_iv_cands (data); j++)
4290             {
4291               cand = iv_cand (data, j);
4292               determine_use_iv_cost (data, use, cand);
4293             }
4294         }
4295       else
4296         {
4297           bitmap_iterator bi;
4298
4299           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4300             {
4301               cand = iv_cand (data, j);
4302               if (!determine_use_iv_cost (data, use, cand))
4303                 bitmap_set_bit (to_clear, j);
4304             }
4305
4306           /* Remove the candidates for that the cost is infinite from
4307              the list of related candidates.  */
4308           bitmap_and_compl_into (use->related_cands, to_clear);
4309           bitmap_clear (to_clear);
4310         }
4311     }
4312
4313   BITMAP_FREE (to_clear);
4314
4315   if (dump_file && (dump_flags & TDF_DETAILS))
4316     {
4317       fprintf (dump_file, "Use-candidate costs:\n");
4318
4319       for (i = 0; i < n_iv_uses (data); i++)
4320         {
4321           use = iv_use (data, i);
4322
4323           fprintf (dump_file, "Use %d:\n", i);
4324           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4325           for (j = 0; j < use->n_map_members; j++)
4326             {
4327               if (!use->cost_map[j].cand
4328                   || use->cost_map[j].cost == INFTY)
4329                 continue;
4330
4331               fprintf (dump_file, "  %d\t%d\t",
4332                        use->cost_map[j].cand->id,
4333                        use->cost_map[j].cost);
4334               if (use->cost_map[j].depends_on)
4335                 bitmap_print (dump_file,
4336                               use->cost_map[j].depends_on, "","");
4337               fprintf (dump_file, "\n");
4338             }
4339
4340           fprintf (dump_file, "\n");
4341         }
4342       fprintf (dump_file, "\n");
4343     }
4344 }
4345
4346 /* Determines cost of the candidate CAND.  */
4347
4348 static void
4349 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4350 {
4351   unsigned cost_base, cost_step;
4352   tree base;
4353
4354   if (!cand->iv)
4355     {
4356       cand->cost = 0;
4357       return;
4358     }
4359
4360   /* There are two costs associated with the candidate -- its increment
4361      and its initialization.  The second is almost negligible for any loop
4362      that rolls enough, so we take it just very little into account.  */
4363
4364   base = cand->iv->base;
4365   cost_base = force_var_cost (data, base, NULL);
4366   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4367
4368   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4369
4370   /* Prefer the original iv unless we may gain something by replacing it;
4371      this is not really relevant for artificial ivs created by other
4372      passes.  */
4373   if (cand->pos == IP_ORIGINAL
4374       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4375     cand->cost--;
4376
4377   /* Prefer not to insert statements into latch unless there are some
4378      already (so that we do not create unnecessary jumps).  */
4379   if (cand->pos == IP_END
4380       && empty_block_p (ip_end_pos (data->current_loop)))
4381     cand->cost++;
4382 }
4383
4384 /* Determines costs of computation of the candidates.  */
4385
4386 static void
4387 determine_iv_costs (struct ivopts_data *data)
4388 {
4389   unsigned i;
4390
4391   if (dump_file && (dump_flags & TDF_DETAILS))
4392     {
4393       fprintf (dump_file, "Candidate costs:\n");
4394       fprintf (dump_file, "  cand\tcost\n");
4395     }
4396
4397   for (i = 0; i < n_iv_cands (data); i++)
4398     {
4399       struct iv_cand *cand = iv_cand (data, i);
4400
4401       determine_iv_cost (data, cand);
4402
4403       if (dump_file && (dump_flags & TDF_DETAILS))
4404         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4405     }
4406
4407   if (dump_file && (dump_flags & TDF_DETAILS))
4408     fprintf (dump_file, "\n");
4409 }
4410
4411 /* Calculates cost for having SIZE induction variables.  */
4412
4413 static unsigned
4414 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4415 {
4416   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4417 }
4418
4419 /* For each size of the induction variable set determine the penalty.  */
4420
4421 static void
4422 determine_set_costs (struct ivopts_data *data)
4423 {
4424   unsigned j, n;
4425   tree phi, op;
4426   struct loop *loop = data->current_loop;
4427   bitmap_iterator bi;
4428
4429   /* We use the following model (definitely improvable, especially the
4430      cost function -- TODO):
4431
4432      We estimate the number of registers available (using MD data), name it A.
4433
4434      We estimate the number of registers used by the loop, name it U.  This
4435      number is obtained as the number of loop phi nodes (not counting virtual
4436      registers and bivs) + the number of variables from outside of the loop.
4437
4438      We set a reserve R (free regs that are used for temporary computations,
4439      etc.).  For now the reserve is a constant 3.
4440
4441      Let I be the number of induction variables.
4442
4443      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4444         make a lot of ivs without a reason).
4445      -- if A - R < U + I <= A, the cost is I * PRES_COST
4446      -- if U + I > A, the cost is I * PRES_COST and
4447         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4448
4449   if (dump_file && (dump_flags & TDF_DETAILS))
4450     {
4451       fprintf (dump_file, "Global costs:\n");
4452       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4453       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4454       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4455       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4456     }
4457
4458   n = 0;
4459   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4460     {
4461       op = PHI_RESULT (phi);
4462
4463       if (!is_gimple_reg (op))
4464         continue;
4465
4466       if (get_iv (data, op))
4467         continue;
4468
4469       n++;
4470     }
4471
4472   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4473     {
4474       struct version_info *info = ver_info (data, j);
4475
4476       if (info->inv_id && info->has_nonlin_use)
4477         n++;
4478     }
4479
4480   data->regs_used = n;
4481   if (dump_file && (dump_flags & TDF_DETAILS))
4482     fprintf (dump_file, "  regs_used %d\n", n);
4483
4484   if (dump_file && (dump_flags & TDF_DETAILS))
4485     {
4486       fprintf (dump_file, "  cost for size:\n");
4487       fprintf (dump_file, "  ivs\tcost\n");
4488       for (j = 0; j <= 2 * target_avail_regs; j++)
4489         fprintf (dump_file, "  %d\t%d\n", j,
4490                  ivopts_global_cost_for_size (data, j));
4491       fprintf (dump_file, "\n");
4492     }
4493 }
4494
4495 /* Returns true if A is a cheaper cost pair than B.  */
4496
4497 static bool
4498 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4499 {
4500   if (!a)
4501     return false;
4502
4503   if (!b)
4504     return true;
4505
4506   if (a->cost < b->cost)
4507     return true;
4508
4509   if (a->cost > b->cost)
4510     return false;
4511
4512   /* In case the costs are the same, prefer the cheaper candidate.  */
4513   if (a->cand->cost < b->cand->cost)
4514     return true;
4515
4516   return false;
4517 }
4518
4519 /* Computes the cost field of IVS structure.  */
4520
4521 static void
4522 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4523 {
4524   unsigned cost = 0;
4525
4526   cost += ivs->cand_use_cost;
4527   cost += ivs->cand_cost;
4528   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4529
4530   ivs->cost = cost;
4531 }
4532
4533 /* Remove invariants in set INVS to set IVS.  */
4534
4535 static void
4536 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4537 {
4538   bitmap_iterator bi;
4539   unsigned iid;
4540
4541   if (!invs)
4542     return;
4543
4544   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4545     {
4546       ivs->n_invariant_uses[iid]--;
4547       if (ivs->n_invariant_uses[iid] == 0)
4548         ivs->n_regs--;
4549     }
4550 }
4551
4552 /* Set USE not to be expressed by any candidate in IVS.  */
4553
4554 static void
4555 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4556                  struct iv_use *use)
4557 {
4558   unsigned uid = use->id, cid;
4559   struct cost_pair *cp;
4560
4561   cp = ivs->cand_for_use[uid];
4562   if (!cp)
4563     return;
4564   cid = cp->cand->id;
4565
4566   ivs->bad_uses++;
4567   ivs->cand_for_use[uid] = NULL;
4568   ivs->n_cand_uses[cid]--;
4569
4570   if (ivs->n_cand_uses[cid] == 0)
4571     {
4572       bitmap_clear_bit (ivs->cands, cid);
4573       /* Do not count the pseudocandidates.  */
4574       if (cp->cand->iv)
4575         ivs->n_regs--;
4576       ivs->n_cands--;
4577       ivs->cand_cost -= cp->cand->cost;
4578
4579       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4580     }
4581
4582   ivs->cand_use_cost -= cp->cost;
4583
4584   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4585   iv_ca_recount_cost (data, ivs);
4586 }
4587
4588 /* Add invariants in set INVS to set IVS.  */
4589
4590 static void
4591 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4592 {
4593   bitmap_iterator bi;
4594   unsigned iid;
4595
4596   if (!invs)
4597     return;
4598
4599   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4600     {
4601       ivs->n_invariant_uses[iid]++;
4602       if (ivs->n_invariant_uses[iid] == 1)
4603         ivs->n_regs++;
4604     }
4605 }
4606
4607 /* Set cost pair for USE in set IVS to CP.  */
4608
4609 static void
4610 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4611               struct iv_use *use, struct cost_pair *cp)
4612 {
4613   unsigned uid = use->id, cid;
4614
4615   if (ivs->cand_for_use[uid] == cp)
4616     return;
4617
4618   if (ivs->cand_for_use[uid])
4619     iv_ca_set_no_cp (data, ivs, use);
4620
4621   if (cp)
4622     {
4623       cid = cp->cand->id;
4624
4625       ivs->bad_uses--;
4626       ivs->cand_for_use[uid] = cp;
4627       ivs->n_cand_uses[cid]++;
4628       if (ivs->n_cand_uses[cid] == 1)
4629         {
4630           bitmap_set_bit (ivs->cands, cid);
4631           /* Do not count the pseudocandidates.  */
4632           if (cp->cand->iv)
4633             ivs->n_regs++;
4634           ivs->n_cands++;
4635           ivs->cand_cost += cp->cand->cost;
4636
4637           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4638         }
4639
4640       ivs->cand_use_cost += cp->cost;
4641       iv_ca_set_add_invariants (ivs, cp->depends_on);
4642       iv_ca_recount_cost (data, ivs);
4643     }
4644 }
4645
4646 /* Extend set IVS by expressing USE by some of the candidates in it
4647    if possible.  */
4648
4649 static void
4650 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4651                struct iv_use *use)
4652 {
4653   struct cost_pair *best_cp = NULL, *cp;
4654   bitmap_iterator bi;
4655   unsigned i;
4656
4657   gcc_assert (ivs->upto >= use->id);
4658
4659   if (ivs->upto == use->id)
4660     {
4661       ivs->upto++;
4662       ivs->bad_uses++;
4663     }
4664
4665   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4666     {
4667       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4668
4669       if (cheaper_cost_pair (cp, best_cp))
4670         best_cp = cp;
4671     }
4672
4673   iv_ca_set_cp (data, ivs, use, best_cp);
4674 }
4675
4676 /* Get cost for assignment IVS.  */
4677
4678 static unsigned
4679 iv_ca_cost (struct iv_ca *ivs)
4680 {
4681   return (ivs->bad_uses ? INFTY : ivs->cost);
4682 }
4683
4684 /* Returns true if all dependences of CP are among invariants in IVS.  */
4685
4686 static bool
4687 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4688 {
4689   unsigned i;
4690   bitmap_iterator bi;
4691
4692   if (!cp->depends_on)
4693     return true;
4694
4695   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4696     {
4697       if (ivs->n_invariant_uses[i] == 0)
4698         return false;
4699     }
4700
4701   return true;
4702 }
4703
4704 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4705    it before NEXT_CHANGE.  */
4706
4707 static struct iv_ca_delta *
4708 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4709                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4710 {
4711   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4712
4713   change->use = use;
4714   change->old_cp = old_cp;
4715   change->new_cp = new_cp;
4716   change->next_change = next_change;
4717
4718   return change;
4719 }
4720
4721 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4722    are rewritten.  */
4723
4724 static struct iv_ca_delta *
4725 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4726 {
4727   struct iv_ca_delta *last;
4728
4729   if (!l2)
4730     return l1;
4731
4732   if (!l1)
4733     return l2;
4734
4735   for (last = l1; last->next_change; last = last->next_change)
4736     continue;
4737   last->next_change = l2;
4738
4739   return l1;
4740 }
4741
4742 /* Returns candidate by that USE is expressed in IVS.  */
4743
4744 static struct cost_pair *
4745 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4746 {
4747   return ivs->cand_for_use[use->id];
4748 }
4749
4750 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4751
4752 static struct iv_ca_delta *
4753 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4754 {
4755   struct iv_ca_delta *act, *next, *prev = NULL;
4756   struct cost_pair *tmp;
4757
4758   for (act = delta; act; act = next)
4759     {
4760       next = act->next_change;
4761       act->next_change = prev;
4762       prev = act;
4763
4764       tmp = act->old_cp;
4765       act->old_cp = act->new_cp;
4766       act->new_cp = tmp;
4767     }
4768
4769   return prev;
4770 }
4771
4772 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4773    reverted instead.  */
4774
4775 static void
4776 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4777                     struct iv_ca_delta *delta, bool forward)
4778 {
4779   struct cost_pair *from, *to;
4780   struct iv_ca_delta *act;
4781
4782   if (!forward)
4783     delta = iv_ca_delta_reverse (delta);
4784
4785   for (act = delta; act; act = act->next_change)
4786     {
4787       from = act->old_cp;
4788       to = act->new_cp;
4789       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4790       iv_ca_set_cp (data, ivs, act->use, to);
4791     }
4792
4793   if (!forward)
4794     iv_ca_delta_reverse (delta);
4795 }
4796
4797 /* Returns true if CAND is used in IVS.  */
4798
4799 static bool
4800 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4801 {
4802   return ivs->n_cand_uses[cand->id] > 0;
4803 }
4804
4805 /* Returns number of induction variable candidates in the set IVS.  */
4806
4807 static unsigned
4808 iv_ca_n_cands (struct iv_ca *ivs)
4809 {
4810   return ivs->n_cands;
4811 }
4812
4813 /* Free the list of changes DELTA.  */
4814
4815 static void
4816 iv_ca_delta_free (struct iv_ca_delta **delta)
4817 {
4818   struct iv_ca_delta *act, *next;
4819
4820   for (act = *delta; act; act = next)
4821     {
4822       next = act->next_change;
4823       free (act);
4824     }
4825
4826   *delta = NULL;
4827 }
4828
4829 /* Allocates new iv candidates assignment.  */
4830
4831 static struct iv_ca *
4832 iv_ca_new (struct ivopts_data *data)
4833 {
4834   struct iv_ca *nw = XNEW (struct iv_ca);
4835
4836   nw->upto = 0;
4837   nw->bad_uses = 0;
4838   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4839   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4840   nw->cands = BITMAP_ALLOC (NULL);
4841   nw->n_cands = 0;
4842   nw->n_regs = 0;
4843   nw->cand_use_cost = 0;
4844   nw->cand_cost = 0;
4845   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4846   nw->cost = 0;
4847
4848   return nw;
4849 }
4850
4851 /* Free memory occupied by the set IVS.  */
4852
4853 static void
4854 iv_ca_free (struct iv_ca **ivs)
4855 {
4856   free ((*ivs)->cand_for_use);
4857   free ((*ivs)->n_cand_uses);
4858   BITMAP_FREE ((*ivs)->cands);
4859   free ((*ivs)->n_invariant_uses);
4860   free (*ivs);
4861   *ivs = NULL;
4862 }
4863
4864 /* Dumps IVS to FILE.  */
4865
4866 static void
4867 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4868 {
4869   const char *pref = "  invariants ";
4870   unsigned i;
4871
4872   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4873   bitmap_print (file, ivs->cands, "  candidates ","\n");
4874
4875   for (i = 1; i <= data->max_inv_id; i++)
4876     if (ivs->n_invariant_uses[i])
4877       {
4878         fprintf (file, "%s%d", pref, i);
4879         pref = ", ";
4880       }
4881   fprintf (file, "\n");
4882 }
4883
4884 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4885    new set, and store differences in DELTA.  Number of induction variables
4886    in the new set is stored to N_IVS.  */
4887
4888 static unsigned
4889 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4890               struct iv_cand *cand, struct iv_ca_delta **delta,
4891               unsigned *n_ivs)
4892 {
4893   unsigned i, cost;
4894   struct iv_use *use;
4895   struct cost_pair *old_cp, *new_cp;
4896
4897   *delta = NULL;
4898   for (i = 0; i < ivs->upto; i++)
4899     {
4900       use = iv_use (data, i);
4901       old_cp = iv_ca_cand_for_use (ivs, use);
4902
4903       if (old_cp
4904           && old_cp->cand == cand)
4905         continue;
4906
4907       new_cp = get_use_iv_cost (data, use, cand);
4908       if (!new_cp)
4909         continue;
4910
4911       if (!iv_ca_has_deps (ivs, new_cp))
4912         continue;
4913
4914       if (!cheaper_cost_pair (new_cp, old_cp))
4915         continue;
4916
4917       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4918     }
4919
4920   iv_ca_delta_commit (data, ivs, *delta, true);
4921   cost = iv_ca_cost (ivs);
4922   if (n_ivs)
4923     *n_ivs = iv_ca_n_cands (ivs);
4924   iv_ca_delta_commit (data, ivs, *delta, false);
4925
4926   return cost;
4927 }
4928
4929 /* Try narrowing set IVS by removing CAND.  Return the cost of
4930    the new set and store the differences in DELTA.  */
4931
4932 static unsigned
4933 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4934               struct iv_cand *cand, struct iv_ca_delta **delta)
4935 {
4936   unsigned i, ci;
4937   struct iv_use *use;
4938   struct cost_pair *old_cp, *new_cp, *cp;
4939   bitmap_iterator bi;
4940   struct iv_cand *cnd;
4941   unsigned cost;
4942
4943   *delta = NULL;
4944   for (i = 0; i < n_iv_uses (data); i++)
4945     {
4946       use = iv_use (data, i);
4947
4948       old_cp = iv_ca_cand_for_use (ivs, use);
4949       if (old_cp->cand != cand)
4950         continue;
4951
4952       new_cp = NULL;
4953
4954       if (data->consider_all_candidates)
4955         {
4956           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4957             {
4958               if (ci == cand->id)
4959                 continue;
4960
4961               cnd = iv_cand (data, ci);
4962
4963               cp = get_use_iv_cost (data, use, cnd);
4964               if (!cp)
4965                 continue;
4966               if (!iv_ca_has_deps (ivs, cp))
4967                 continue;
4968
4969               if (!cheaper_cost_pair (cp, new_cp))
4970                 continue;
4971
4972               new_cp = cp;
4973             }
4974         }
4975       else
4976         {
4977           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4978             {
4979               if (ci == cand->id)
4980                 continue;
4981
4982               cnd = iv_cand (data, ci);
4983
4984               cp = get_use_iv_cost (data, use, cnd);
4985               if (!cp)
4986                 continue;
4987               if (!iv_ca_has_deps (ivs, cp))
4988                 continue;
4989
4990               if (!cheaper_cost_pair (cp, new_cp))
4991                 continue;
4992
4993               new_cp = cp;
4994             }
4995         }
4996
4997       if (!new_cp)
4998         {
4999           iv_ca_delta_free (delta);
5000           return INFTY;
5001         }
5002
5003       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5004     }
5005
5006   iv_ca_delta_commit (data, ivs, *delta, true);
5007   cost = iv_ca_cost (ivs);
5008   iv_ca_delta_commit (data, ivs, *delta, false);
5009
5010   return cost;
5011 }
5012
5013 /* Try optimizing the set of candidates IVS by removing candidates different
5014    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5015    differences in DELTA.  */
5016
5017 static unsigned
5018 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5019              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5020 {
5021   bitmap_iterator bi;
5022   struct iv_ca_delta *act_delta, *best_delta;
5023   unsigned i, best_cost, acost;
5024   struct iv_cand *cand;
5025
5026   best_delta = NULL;
5027   best_cost = iv_ca_cost (ivs);
5028
5029   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5030     {
5031       cand = iv_cand (data, i);
5032
5033       if (cand == except_cand)
5034         continue;
5035
5036       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5037
5038       if (acost < best_cost)
5039         {
5040           best_cost = acost;
5041           iv_ca_delta_free (&best_delta);
5042           best_delta = act_delta;
5043         }
5044       else
5045         iv_ca_delta_free (&act_delta);
5046     }
5047
5048   if (!best_delta)
5049     {
5050       *delta = NULL;
5051       return best_cost;
5052     }
5053
5054   /* Recurse to possibly remove other unnecessary ivs.  */
5055   iv_ca_delta_commit (data, ivs, best_delta, true);
5056   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5057   iv_ca_delta_commit (data, ivs, best_delta, false);
5058   *delta = iv_ca_delta_join (best_delta, *delta);
5059   return best_cost;
5060 }
5061
5062 /* Tries to extend the sets IVS in the best possible way in order
5063    to express the USE.  */
5064
5065 static bool
5066 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5067                   struct iv_use *use)
5068 {
5069   unsigned best_cost, act_cost;
5070   unsigned i;
5071   bitmap_iterator bi;
5072   struct iv_cand *cand;
5073   struct iv_ca_delta *best_delta = NULL, *act_delta;
5074   struct cost_pair *cp;
5075
5076   iv_ca_add_use (data, ivs, use);
5077   best_cost = iv_ca_cost (ivs);
5078
5079   cp = iv_ca_cand_for_use (ivs, use);
5080   if (cp)
5081     {
5082       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5083       iv_ca_set_no_cp (data, ivs, use);
5084     }
5085
5086   /* First try important candidates.  Only if it fails, try the specific ones.
5087      Rationale -- in loops with many variables the best choice often is to use
5088      just one generic biv.  If we added here many ivs specific to the uses,
5089      the optimization algorithm later would be likely to get stuck in a local
5090      minimum, thus causing us to create too many ivs.  The approach from
5091      few ivs to more seems more likely to be successful -- starting from few
5092      ivs, replacing an expensive use by a specific iv should always be a
5093      win.  */
5094   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5095     {
5096       cand = iv_cand (data, i);
5097
5098       if (iv_ca_cand_used_p (ivs, cand))
5099         continue;
5100
5101       cp = get_use_iv_cost (data, use, cand);
5102       if (!cp)
5103         continue;
5104
5105       iv_ca_set_cp (data, ivs, use, cp);
5106       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5107       iv_ca_set_no_cp (data, ivs, use);
5108       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5109
5110       if (act_cost < best_cost)
5111         {
5112           best_cost = act_cost;
5113
5114           iv_ca_delta_free (&best_delta);
5115           best_delta = act_delta;
5116         }
5117       else
5118         iv_ca_delta_free (&act_delta);
5119     }
5120
5121   if (best_cost == INFTY)
5122     {
5123       for (i = 0; i < use->n_map_members; i++)
5124         {
5125           cp = use->cost_map + i;
5126           cand = cp->cand;
5127           if (!cand)
5128             continue;
5129
5130           /* Already tried this.  */
5131           if (cand->important)
5132             continue;
5133
5134           if (iv_ca_cand_used_p (ivs, cand))
5135             continue;
5136
5137           act_delta = NULL;
5138           iv_ca_set_cp (data, ivs, use, cp);
5139           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5140           iv_ca_set_no_cp (data, ivs, use);
5141           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5142                                        cp, act_delta);
5143
5144           if (act_cost < best_cost)
5145             {
5146               best_cost = act_cost;
5147
5148               if (best_delta)
5149                 iv_ca_delta_free (&best_delta);
5150               best_delta = act_delta;
5151             }
5152           else
5153             iv_ca_delta_free (&act_delta);
5154         }
5155     }
5156
5157   iv_ca_delta_commit (data, ivs, best_delta, true);
5158   iv_ca_delta_free (&best_delta);
5159
5160   return (best_cost != INFTY);
5161 }
5162
5163 /* Finds an initial assignment of candidates to uses.  */
5164
5165 static struct iv_ca *
5166 get_initial_solution (struct ivopts_data *data)
5167 {
5168   struct iv_ca *ivs = iv_ca_new (data);
5169   unsigned i;
5170
5171   for (i = 0; i < n_iv_uses (data); i++)
5172     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5173       {
5174         iv_ca_free (&ivs);
5175         return NULL;
5176       }
5177
5178   return ivs;
5179 }
5180
5181 /* Tries to improve set of induction variables IVS.  */
5182
5183 static bool
5184 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5185 {
5186   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5187   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5188   struct iv_cand *cand;
5189
5190   /* Try extending the set of induction variables by one.  */
5191   for (i = 0; i < n_iv_cands (data); i++)
5192     {
5193       cand = iv_cand (data, i);
5194
5195       if (iv_ca_cand_used_p (ivs, cand))
5196         continue;
5197
5198       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5199       if (!act_delta)
5200         continue;
5201
5202       /* If we successfully added the candidate and the set is small enough,
5203          try optimizing it by removing other candidates.  */
5204       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5205         {
5206           iv_ca_delta_commit (data, ivs, act_delta, true);
5207           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5208           iv_ca_delta_commit (data, ivs, act_delta, false);
5209           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5210         }
5211
5212       if (acost < best_cost)
5213         {
5214           best_cost = acost;
5215           iv_ca_delta_free (&best_delta);
5216           best_delta = act_delta;
5217         }
5218       else
5219         iv_ca_delta_free (&act_delta);
5220     }
5221
5222   if (!best_delta)
5223     {
5224       /* Try removing the candidates from the set instead.  */
5225       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5226
5227       /* Nothing more we can do.  */
5228       if (!best_delta)
5229         return false;
5230     }
5231
5232   iv_ca_delta_commit (data, ivs, best_delta, true);
5233   gcc_assert (best_cost == iv_ca_cost (ivs));
5234   iv_ca_delta_free (&best_delta);
5235   return true;
5236 }
5237
5238 /* Attempts to find the optimal set of induction variables.  We do simple
5239    greedy heuristic -- we try to replace at most one candidate in the selected
5240    solution and remove the unused ivs while this improves the cost.  */
5241
5242 static struct iv_ca *
5243 find_optimal_iv_set (struct ivopts_data *data)
5244 {
5245   unsigned i;
5246   struct iv_ca *set;
5247   struct iv_use *use;
5248
5249   /* Get the initial solution.  */
5250   set = get_initial_solution (data);
5251   if (!set)
5252     {
5253       if (dump_file && (dump_flags & TDF_DETAILS))
5254         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5255       return NULL;
5256     }
5257
5258   if (dump_file && (dump_flags & TDF_DETAILS))
5259     {
5260       fprintf (dump_file, "Initial set of candidates:\n");
5261       iv_ca_dump (data, dump_file, set);
5262     }
5263
5264   while (try_improve_iv_set (data, set))
5265     {
5266       if (dump_file && (dump_flags & TDF_DETAILS))
5267         {
5268           fprintf (dump_file, "Improved to:\n");
5269           iv_ca_dump (data, dump_file, set);
5270         }
5271     }
5272
5273   if (dump_file && (dump_flags & TDF_DETAILS))
5274     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5275
5276   for (i = 0; i < n_iv_uses (data); i++)
5277     {
5278       use = iv_use (data, i);
5279       use->selected = iv_ca_cand_for_use (set, use)->cand;
5280     }
5281
5282   return set;
5283 }
5284
5285 /* Creates a new induction variable corresponding to CAND.  */
5286
5287 static void
5288 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5289 {
5290   block_stmt_iterator incr_pos;
5291   tree base;
5292   bool after = false;
5293
5294   if (!cand->iv)
5295     return;
5296
5297   switch (cand->pos)
5298     {
5299     case IP_NORMAL:
5300       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5301       break;
5302
5303     case IP_END:
5304       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5305       after = true;
5306       break;
5307
5308     case IP_ORIGINAL:
5309       /* Mark that the iv is preserved.  */
5310       name_info (data, cand->var_before)->preserve_biv = true;
5311       name_info (data, cand->var_after)->preserve_biv = true;
5312
5313       /* Rewrite the increment so that it uses var_before directly.  */
5314       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5315
5316       return;
5317     }
5318
5319   gimple_add_tmp_var (cand->var_before);
5320   add_referenced_var (cand->var_before);
5321
5322   base = unshare_expr (cand->iv->base);
5323
5324   create_iv (base, unshare_expr (cand->iv->step),
5325              cand->var_before, data->current_loop,
5326              &incr_pos, after, &cand->var_before, &cand->var_after);
5327 }
5328
5329 /* Creates new induction variables described in SET.  */
5330
5331 static void
5332 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5333 {
5334   unsigned i;
5335   struct iv_cand *cand;
5336   bitmap_iterator bi;
5337
5338   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5339     {
5340       cand = iv_cand (data, i);
5341       create_new_iv (data, cand);
5342     }
5343 }
5344
5345 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5346    is true, remove also the ssa name defined by the statement.  */
5347
5348 static void
5349 remove_statement (tree stmt, bool including_defined_name)
5350 {
5351   if (TREE_CODE (stmt) == PHI_NODE)
5352     {
5353       if (!including_defined_name)
5354         {
5355           /* Prevent the ssa name defined by the statement from being removed.  */
5356           SET_PHI_RESULT (stmt, NULL);
5357         }
5358       remove_phi_node (stmt, NULL_TREE);
5359     }
5360   else
5361     {
5362       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5363
5364       bsi_remove (&bsi, true);
5365     }
5366 }
5367
5368 /* Rewrites USE (definition of iv used in a nonlinear expression)
5369    using candidate CAND.  */
5370
5371 static void
5372 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5373                             struct iv_use *use, struct iv_cand *cand)
5374 {
5375   tree comp;
5376   tree op, stmts, tgt, ass;
5377   block_stmt_iterator bsi, pbsi;
5378
5379   /* An important special case -- if we are asked to express value of
5380      the original iv by itself, just exit; there is no need to
5381      introduce a new computation (that might also need casting the
5382      variable to unsigned and back).  */
5383   if (cand->pos == IP_ORIGINAL
5384       && cand->incremented_at == use->stmt)
5385     {
5386       tree step, ctype, utype;
5387       enum tree_code incr_code = PLUS_EXPR;
5388
5389       gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5390       gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5391
5392       step = cand->iv->step;
5393       ctype = TREE_TYPE (step);
5394       utype = TREE_TYPE (cand->var_after);
5395       if (TREE_CODE (step) == NEGATE_EXPR)
5396         {
5397           incr_code = MINUS_EXPR;
5398           step = TREE_OPERAND (step, 0);
5399         }
5400
5401       /* Check whether we may leave the computation unchanged.
5402          This is the case only if it does not rely on other
5403          computations in the loop -- otherwise, the computation
5404          we rely upon may be removed in remove_unused_ivs,
5405          thus leading to ICE.  */
5406       op = TREE_OPERAND (use->stmt, 1);
5407       if (TREE_CODE (op) == PLUS_EXPR
5408           || TREE_CODE (op) == MINUS_EXPR)
5409         {
5410           if (TREE_OPERAND (op, 0) == cand->var_before)
5411             op = TREE_OPERAND (op, 1);
5412           else if (TREE_CODE (op) == PLUS_EXPR
5413                    && TREE_OPERAND (op, 1) == cand->var_before)
5414             op = TREE_OPERAND (op, 0);
5415           else
5416             op = NULL_TREE;
5417         }
5418       else
5419         op = NULL_TREE;
5420
5421       if (op
5422           && (TREE_CODE (op) == INTEGER_CST
5423               || operand_equal_p (op, step, 0)))
5424         return;
5425
5426       /* Otherwise, add the necessary computations to express
5427          the iv.  */
5428       op = fold_convert (ctype, cand->var_before);
5429       comp = fold_convert (utype,
5430                            build2 (incr_code, ctype, op,
5431                                    unshare_expr (step)));
5432     }
5433   else
5434     comp = get_computation (data->current_loop, use, cand);
5435
5436   switch (TREE_CODE (use->stmt))
5437     {
5438     case PHI_NODE:
5439       tgt = PHI_RESULT (use->stmt);
5440
5441       /* If we should keep the biv, do not replace it.  */
5442       if (name_info (data, tgt)->preserve_biv)
5443         return;
5444
5445       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5446       while (!bsi_end_p (pbsi)
5447              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5448         {
5449           bsi = pbsi;
5450           bsi_next (&pbsi);
5451         }
5452       break;
5453
5454     case MODIFY_EXPR:
5455       tgt = TREE_OPERAND (use->stmt, 0);
5456       bsi = bsi_for_stmt (use->stmt);
5457       break;
5458
5459     default:
5460       gcc_unreachable ();
5461     }
5462
5463   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5464
5465   if (TREE_CODE (use->stmt) == PHI_NODE)
5466     {
5467       if (stmts)
5468         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5469       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5470       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5471       remove_statement (use->stmt, false);
5472       SSA_NAME_DEF_STMT (tgt) = ass;
5473     }
5474   else
5475     {
5476       if (stmts)
5477         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5478       TREE_OPERAND (use->stmt, 1) = op;
5479     }
5480 }
5481
5482 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5483    for_each_index.  */
5484
5485 static bool
5486 idx_remove_ssa_names (tree base, tree *idx,
5487                       void *data ATTRIBUTE_UNUSED)
5488 {
5489   tree *op;
5490
5491   if (TREE_CODE (*idx) == SSA_NAME)
5492     *idx = SSA_NAME_VAR (*idx);
5493
5494   if (TREE_CODE (base) == ARRAY_REF)
5495     {
5496       op = &TREE_OPERAND (base, 2);
5497       if (*op
5498           && TREE_CODE (*op) == SSA_NAME)
5499         *op = SSA_NAME_VAR (*op);
5500       op = &TREE_OPERAND (base, 3);
5501       if (*op
5502           && TREE_CODE (*op) == SSA_NAME)
5503         *op = SSA_NAME_VAR (*op);
5504     }
5505
5506   return true;
5507 }
5508
5509 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5510
5511 static tree
5512 unshare_and_remove_ssa_names (tree ref)
5513 {
5514   ref = unshare_expr (ref);
5515   for_each_index (&ref, idx_remove_ssa_names, NULL);
5516
5517   return ref;
5518 }
5519
5520 /* Extract the alias analysis info for the memory reference REF.  There are
5521    several ways how this information may be stored and what precisely is
5522    its semantics depending on the type of the reference, but there always is
5523    somewhere hidden one _DECL node that is used to determine the set of
5524    virtual operands for the reference.  The code below deciphers this jungle
5525    and extracts this single useful piece of information.  */
5526
5527 static tree
5528 get_ref_tag (tree ref, tree orig)
5529 {
5530   tree var = get_base_address (ref);
5531   tree aref = NULL_TREE, tag, sv;
5532   HOST_WIDE_INT offset, size, maxsize;
5533
5534   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5535     {
5536       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5537       if (ref)
5538         break;
5539     }
5540
5541   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5542     return unshare_expr (sv);
5543
5544   if (!var)
5545     return NULL_TREE;
5546
5547   if (TREE_CODE (var) == INDIRECT_REF)
5548     {
5549       /* If the base is a dereference of a pointer, first check its name memory
5550          tag.  If it does not have one, use its symbol memory tag.  */
5551       var = TREE_OPERAND (var, 0);
5552       if (TREE_CODE (var) != SSA_NAME)
5553         return NULL_TREE;
5554
5555       if (SSA_NAME_PTR_INFO (var))
5556         {
5557           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5558           if (tag)
5559             return tag;
5560         }
5561
5562       var = SSA_NAME_VAR (var);
5563       tag = var_ann (var)->symbol_mem_tag;
5564       gcc_assert (tag != NULL_TREE);
5565       return tag;
5566     }
5567   else
5568     {
5569       if (!DECL_P (var))
5570         return NULL_TREE;
5571
5572       tag = var_ann (var)->symbol_mem_tag;
5573       if (tag)
5574         return tag;
5575
5576       return var;
5577     }
5578 }
5579
5580 /* Copies the reference information from OLD_REF to NEW_REF.  */
5581
5582 static void
5583 copy_ref_info (tree new_ref, tree old_ref)
5584 {
5585   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5586     copy_mem_ref_info (new_ref, old_ref);
5587   else
5588     {
5589       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5590       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5591     }
5592 }
5593
5594 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5595
5596 static void
5597 rewrite_use_address (struct ivopts_data *data,
5598                      struct iv_use *use, struct iv_cand *cand)
5599 {
5600   struct affine_tree_combination aff;
5601   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5602   tree ref;
5603
5604   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5605   unshare_aff_combination (&aff);
5606
5607   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5608   copy_ref_info (ref, *use->op_p);
5609   *use->op_p = ref;
5610 }
5611
5612 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5613    candidate CAND.  */
5614
5615 static void
5616 rewrite_use_compare (struct ivopts_data *data,
5617                      struct iv_use *use, struct iv_cand *cand)
5618 {
5619   tree comp;
5620   tree *op_p, cond, op, stmts, bound;
5621   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5622   enum tree_code compare;
5623   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5624
5625   bound = cp->value;
5626   if (bound)
5627     {
5628       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5629       tree var_type = TREE_TYPE (var);
5630
5631       compare = iv_elimination_compare (data, use);
5632       bound = fold_convert (var_type, bound);
5633       op = force_gimple_operand (unshare_expr (bound), &stmts,
5634                                  true, NULL_TREE);
5635
5636       if (stmts)
5637         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5638
5639       *use->op_p = build2 (compare, boolean_type_node, var, op);
5640       update_stmt (use->stmt);
5641       return;
5642     }
5643
5644   /* The induction variable elimination failed; just express the original
5645      giv.  */
5646   comp = get_computation (data->current_loop, use, cand);
5647
5648   cond = *use->op_p;
5649   op_p = &TREE_OPERAND (cond, 0);
5650   if (TREE_CODE (*op_p) != SSA_NAME
5651       || zero_p (get_iv (data, *op_p)->step))
5652     op_p = &TREE_OPERAND (cond, 1);
5653
5654   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5655   if (stmts)
5656     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5657
5658   *op_p = op;
5659 }
5660
5661 /* Rewrites USE using candidate CAND.  */
5662
5663 static void
5664 rewrite_use (struct ivopts_data *data,
5665              struct iv_use *use, struct iv_cand *cand)
5666 {
5667   switch (use->type)
5668     {
5669       case USE_NONLINEAR_EXPR:
5670         rewrite_use_nonlinear_expr (data, use, cand);
5671         break;
5672
5673       case USE_ADDRESS:
5674         rewrite_use_address (data, use, cand);
5675         break;
5676
5677       case USE_COMPARE:
5678         rewrite_use_compare (data, use, cand);
5679         break;
5680
5681       default:
5682         gcc_unreachable ();
5683     }
5684   mark_new_vars_to_rename (use->stmt);
5685 }
5686
5687 /* Rewrite the uses using the selected induction variables.  */
5688
5689 static void
5690 rewrite_uses (struct ivopts_data *data)
5691 {
5692   unsigned i;
5693   struct iv_cand *cand;
5694   struct iv_use *use;
5695
5696   for (i = 0; i < n_iv_uses (data); i++)
5697     {
5698       use = iv_use (data, i);
5699       cand = use->selected;
5700       gcc_assert (cand);
5701
5702       rewrite_use (data, use, cand);
5703     }
5704 }
5705
5706 /* Removes the ivs that are not used after rewriting.  */
5707
5708 static void
5709 remove_unused_ivs (struct ivopts_data *data)
5710 {
5711   unsigned j;
5712   bitmap_iterator bi;
5713
5714   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5715     {
5716       struct version_info *info;
5717
5718       info = ver_info (data, j);
5719       if (info->iv
5720           && !zero_p (info->iv->step)
5721           && !info->inv_id
5722           && !info->iv->have_use_for
5723           && !info->preserve_biv)
5724         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5725     }
5726 }
5727
5728 /* Frees data allocated by the optimization of a single loop.  */
5729
5730 static void
5731 free_loop_data (struct ivopts_data *data)
5732 {
5733   unsigned i, j;
5734   bitmap_iterator bi;
5735   tree obj;
5736
5737   htab_empty (data->niters);
5738
5739   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5740     {
5741       struct version_info *info;
5742
5743       info = ver_info (data, i);
5744       if (info->iv)
5745         free (info->iv);
5746       info->iv = NULL;
5747       info->has_nonlin_use = false;
5748       info->preserve_biv = false;
5749       info->inv_id = 0;
5750     }
5751   bitmap_clear (data->relevant);
5752   bitmap_clear (data->important_candidates);
5753
5754   for (i = 0; i < n_iv_uses (data); i++)
5755     {
5756       struct iv_use *use = iv_use (data, i);
5757
5758       free (use->iv);
5759       BITMAP_FREE (use->related_cands);
5760       for (j = 0; j < use->n_map_members; j++)
5761         if (use->cost_map[j].depends_on)
5762           BITMAP_FREE (use->cost_map[j].depends_on);
5763       free (use->cost_map);
5764       free (use);
5765     }
5766   VEC_truncate (iv_use_p, data->iv_uses, 0);
5767
5768   for (i = 0; i < n_iv_cands (data); i++)
5769     {
5770       struct iv_cand *cand = iv_cand (data, i);
5771
5772       if (cand->iv)
5773         free (cand->iv);
5774       if (cand->depends_on)
5775         BITMAP_FREE (cand->depends_on);
5776       free (cand);
5777     }
5778   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5779
5780   if (data->version_info_size < num_ssa_names)
5781     {
5782       data->version_info_size = 2 * num_ssa_names;
5783       free (data->version_info);
5784       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5785     }
5786
5787   data->max_inv_id = 0;
5788
5789   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5790     SET_DECL_RTL (obj, NULL_RTX);
5791
5792   VEC_truncate (tree, decl_rtl_to_reset, 0);
5793 }
5794
5795 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5796    loop tree.  */
5797
5798 static void
5799 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5800 {
5801   free_loop_data (data);
5802   free (data->version_info);
5803   BITMAP_FREE (data->relevant);
5804   BITMAP_FREE (data->important_candidates);
5805   htab_delete (data->niters);
5806
5807   VEC_free (tree, heap, decl_rtl_to_reset);
5808   VEC_free (iv_use_p, heap, data->iv_uses);
5809   VEC_free (iv_cand_p, heap, data->iv_candidates);
5810 }
5811
5812 /* Optimizes the LOOP.  Returns true if anything changed.  */
5813
5814 static bool
5815 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5816 {
5817   bool changed = false;
5818   struct iv_ca *iv_ca;
5819   edge exit;
5820
5821   data->current_loop = loop;
5822
5823   if (dump_file && (dump_flags & TDF_DETAILS))
5824     {
5825       fprintf (dump_file, "Processing loop %d\n", loop->num);
5826
5827       exit = single_dom_exit (loop);
5828       if (exit)
5829         {
5830           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5831                    exit->src->index, exit->dest->index);
5832           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5833           fprintf (dump_file, "\n");
5834         }
5835
5836       fprintf (dump_file, "\n");
5837     }
5838
5839   /* For each ssa name determines whether it behaves as an induction variable
5840      in some loop.  */
5841   if (!find_induction_variables (data))
5842     goto finish;
5843
5844   /* Finds interesting uses (item 1).  */
5845   find_interesting_uses (data);
5846   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5847     goto finish;
5848
5849   /* Finds candidates for the induction variables (item 2).  */
5850   find_iv_candidates (data);
5851
5852   /* Calculates the costs (item 3, part 1).  */
5853   determine_use_iv_costs (data);
5854   determine_iv_costs (data);
5855   determine_set_costs (data);
5856
5857   /* Find the optimal set of induction variables (item 3, part 2).  */
5858   iv_ca = find_optimal_iv_set (data);
5859   if (!iv_ca)
5860     goto finish;
5861   changed = true;
5862
5863   /* Create the new induction variables (item 4, part 1).  */
5864   create_new_ivs (data, iv_ca);
5865   iv_ca_free (&iv_ca);
5866
5867   /* Rewrite the uses (item 4, part 2).  */
5868   rewrite_uses (data);
5869
5870   /* Remove the ivs that are unused after rewriting.  */
5871   remove_unused_ivs (data);
5872
5873   /* We have changed the structure of induction variables; it might happen
5874      that definitions in the scev database refer to some of them that were
5875      eliminated.  */
5876   scev_reset ();
5877
5878 finish:
5879   free_loop_data (data);
5880
5881   return changed;
5882 }
5883
5884 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5885
5886 void
5887 tree_ssa_iv_optimize (struct loops *loops)
5888 {
5889   struct loop *loop;
5890   struct ivopts_data data;
5891
5892   tree_ssa_iv_optimize_init (&data);
5893
5894   /* Optimize the loops starting with the innermost ones.  */
5895   loop = loops->tree_root;
5896   while (loop->inner)
5897     loop = loop->inner;
5898
5899   /* Scan the loops, inner ones first.  */
5900   while (loop != loops->tree_root)
5901     {
5902       if (dump_file && (dump_flags & TDF_DETAILS))
5903         flow_loop_dump (loop, dump_file, NULL, 1);
5904
5905       tree_ssa_iv_optimize_loop (&data, loop);
5906
5907       if (loop->next)
5908         {
5909           loop = loop->next;
5910           while (loop->inner)
5911             loop = loop->inner;
5912         }
5913       else
5914         loop = loop->outer;
5915     }
5916
5917   tree_ssa_iv_optimize_finalize (&data);
5918 }