gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "output.h"
  73 #include "diagnostic.h"
  74 #include "tree-pretty-print.h"
  75 #include "gimple-pretty-print.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "expr.h"
  81 #include "tree-pass.h"
  82 #include "ggc.h"
  83 #include "insn-config.h"
  84 #include "recog.h"
  85 #include "pointer-set.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92 #include "tree-affine.h"
  93 #include "target.h"
  94
  95 /* The infinite cost.  */
  96 #define INFTY 10000000
  97
  98 /* The expected number of loop iterations.  TODO -- use profiling instead of
  99    this.  */
 100 #define AVG_LOOP_NITER(LOOP) 5
 101
 102
 103 /* Representation of the induction variable.  */
 104 struct iv
 105 {
 106   tree base;            /* Initial value of the iv.  */
 107   tree base_object;     /* A memory object to that the induction variable points.  */
 108   tree step;            /* Step of the iv (constant only).  */
 109   tree ssa_name;        /* The ssa name with the value.  */
 110   bool biv_p;           /* Is it a biv?  */
 111   bool have_use_for;    /* Do we already have a use for it?  */
 112   unsigned use_id;      /* The identifier in the use if it is the case.  */
 113 };
 114
 115 /* Per-ssa version information (induction variable descriptions, etc.).  */
 116 struct version_info
 117 {
 118   tree name;            /* The ssa name.  */
 119   struct iv *iv;        /* Induction variable description.  */
 120   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 121                            an expression that is not an induction variable.  */
 122   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 123   unsigned inv_id;      /* Id of an invariant.  */
 124 };
 125
 126 /* Types of uses.  */
 127 enum use_type
 128 {
 129   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 130   USE_ADDRESS,          /* Use in an address.  */
 131   USE_COMPARE           /* Use is a compare.  */
 132 };
 133
 134 /* Cost of a computation.  */
 135 typedef struct
 136 {
 137   int cost;             /* The runtime cost.  */
 138   unsigned complexity;  /* The estimate of the complexity of the code for
 139                            the computation (in no concrete units --
 140                            complexity field should be larger for more
 141                            complex expressions and addressing modes).  */
 142 } comp_cost;
 143
 144 static const comp_cost zero_cost = {0, 0};
 145 static const comp_cost infinite_cost = {INFTY, INFTY};
 146
 147 /* The candidate - cost pair.  */
 148 struct cost_pair
 149 {
 150   struct iv_cand *cand; /* The candidate.  */
 151   comp_cost cost;       /* The cost.  */
 152   bitmap depends_on;    /* The list of invariants that have to be
 153                            preserved.  */
 154   tree value;           /* For final value elimination, the expression for
 155                            the final value of the iv.  For iv elimination,
 156                            the new bound to compare with.  */
 157 };
 158
 159 /* Use.  */
 160 struct iv_use
 161 {
 162   unsigned id;          /* The id of the use.  */
 163   enum use_type type;   /* Type of the use.  */
 164   struct iv *iv;        /* The induction variable it is based on.  */
 165   gimple stmt;          /* Statement in that it occurs.  */
 166   tree *op_p;           /* The place where it occurs.  */
 167   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 168                            important ones.  */
 169
 170   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 171   struct cost_pair *cost_map;
 172                         /* The costs wrto the iv candidates.  */
 173
 174   struct iv_cand *selected;
 175                         /* The selected candidate.  */
 176 };
 177
 178 /* The position where the iv is computed.  */
 179 enum iv_position
 180 {
 181   IP_NORMAL,            /* At the end, just before the exit condition.  */
 182   IP_END,               /* At the end of the latch block.  */
 183   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 184   IP_AFTER_USE,         /* Immediately after a specific use.  */
 185   IP_ORIGINAL           /* The original biv.  */
 186 };
 187
 188 /* The induction variable candidate.  */
 189 struct iv_cand
 190 {
 191   unsigned id;          /* The number of the candidate.  */
 192   bool important;       /* Whether this is an "important" candidate, i.e. such
 193                            that it should be considered by all uses.  */
 194   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 195   gimple incremented_at;/* For original biv, the statement where it is
 196                            incremented.  */
 197   tree var_before;      /* The variable used for it before increment.  */
 198   tree var_after;       /* The variable used for it after increment.  */
 199   struct iv *iv;        /* The value of the candidate.  NULL for
 200                            "pseudocandidate" used to indicate the possibility
 201                            to replace the final value of an iv by direct
 202                            computation of the value.  */
 203   unsigned cost;        /* Cost of the candidate.  */
 204   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 205   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 206                               where it is incremented.  */
 207   bitmap depends_on;    /* The list of invariants that are used in step of the
 208                            biv.  */
 209 };
 210
 211 /* The data used by the induction variable optimizations.  */
 212
 213 typedef struct iv_use *iv_use_p;
 214 DEF_VEC_P(iv_use_p);
 215 DEF_VEC_ALLOC_P(iv_use_p,heap);
 216
 217 typedef struct iv_cand *iv_cand_p;
 218 DEF_VEC_P(iv_cand_p);
 219 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 220
 221 struct ivopts_data
 222 {
 223   /* The currently optimized loop.  */
 224   struct loop *current_loop;
 225
 226   /* Numbers of iterations for all exits of the current loop.  */
 227   struct pointer_map_t *niters;
 228
 229   /* Number of registers used in it.  */
 230   unsigned regs_used;
 231
 232   /* The size of version_info array allocated.  */
 233   unsigned version_info_size;
 234
 235   /* The array of information for the ssa names.  */
 236   struct version_info *version_info;
 237
 238   /* The bitmap of indices in version_info whose value was changed.  */
 239   bitmap relevant;
 240
 241   /* The uses of induction variables.  */
 242   VEC(iv_use_p,heap) *iv_uses;
 243
 244   /* The candidates.  */
 245   VEC(iv_cand_p,heap) *iv_candidates;
 246
 247   /* A bitmap of important candidates.  */
 248   bitmap important_candidates;
 249
 250   /* The maximum invariant id.  */
 251   unsigned max_inv_id;
 252
 253   /* Whether to consider just related and important candidates when replacing a
 254      use.  */
 255   bool consider_all_candidates;
 256
 257   /* Are we optimizing for speed?  */
 258   bool speed;
 259 };
 260
 261 /* An assignment of iv candidates to uses.  */
 262
 263 struct iv_ca
 264 {
 265   /* The number of uses covered by the assignment.  */
 266   unsigned upto;
 267
 268   /* Number of uses that cannot be expressed by the candidates in the set.  */
 269   unsigned bad_uses;
 270
 271   /* Candidate assigned to a use, together with the related costs.  */
 272   struct cost_pair **cand_for_use;
 273
 274   /* Number of times each candidate is used.  */
 275   unsigned *n_cand_uses;
 276
 277   /* The candidates used.  */
 278   bitmap cands;
 279
 280   /* The number of candidates in the set.  */
 281   unsigned n_cands;
 282
 283   /* Total number of registers needed.  */
 284   unsigned n_regs;
 285
 286   /* Total cost of expressing uses.  */
 287   comp_cost cand_use_cost;
 288
 289   /* Total cost of candidates.  */
 290   unsigned cand_cost;
 291
 292   /* Number of times each invariant is used.  */
 293   unsigned *n_invariant_uses;
 294
 295   /* Total cost of the assignment.  */
 296   comp_cost cost;
 297 };
 298
 299 /* Difference of two iv candidate assignments.  */
 300
 301 struct iv_ca_delta
 302 {
 303   /* Changed use.  */
 304   struct iv_use *use;
 305
 306   /* An old assignment (for rollback purposes).  */
 307   struct cost_pair *old_cp;
 308
 309   /* A new assignment.  */
 310   struct cost_pair *new_cp;
 311
 312   /* Next change in the list.  */
 313   struct iv_ca_delta *next_change;
 314 };
 315
 316 /* Bound on number of candidates below that all candidates are considered.  */
 317
 318 #define CONSIDER_ALL_CANDIDATES_BOUND \
 319   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 320
 321 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 322    optimizing such a loop would help, and it would take ages).  */
 323
 324 #define MAX_CONSIDERED_USES \
 325   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 326
 327 /* If there are at most this number of ivs in the set, try removing unnecessary
 328    ivs from the set always.  */
 329
 330 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 331   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 332
 333 /* The list of trees for that the decl_rtl field must be reset is stored
 334    here.  */
 335
 336 static VEC(tree,heap) *decl_rtl_to_reset;
 337
 338 /* Number of uses recorded in DATA.  */
 339
 340 static inline unsigned
 341 n_iv_uses (struct ivopts_data *data)
 342 {
 343   return VEC_length (iv_use_p, data->iv_uses);
 344 }
 345
 346 /* Ith use recorded in DATA.  */
 347
 348 static inline struct iv_use *
 349 iv_use (struct ivopts_data *data, unsigned i)
 350 {
 351   return VEC_index (iv_use_p, data->iv_uses, i);
 352 }
 353
 354 /* Number of candidates recorded in DATA.  */
 355
 356 static inline unsigned
 357 n_iv_cands (struct ivopts_data *data)
 358 {
 359   return VEC_length (iv_cand_p, data->iv_candidates);
 360 }
 361
 362 /* Ith candidate recorded in DATA.  */
 363
 364 static inline struct iv_cand *
 365 iv_cand (struct ivopts_data *data, unsigned i)
 366 {
 367   return VEC_index (iv_cand_p, data->iv_candidates, i);
 368 }
 369
 370 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 371
 372 edge
 373 single_dom_exit (struct loop *loop)
 374 {
 375   edge exit = single_exit (loop);
 376
 377   if (!exit)
 378     return NULL;
 379
 380   if (!just_once_each_iteration_p (loop, exit->src))
 381     return NULL;
 382
 383   return exit;
 384 }
 385
 386 /* Dumps information about the induction variable IV to FILE.  */
 387
 388 extern void dump_iv (FILE *, struct iv *);
 389 void
 390 dump_iv (FILE *file, struct iv *iv)
 391 {
 392   if (iv->ssa_name)
 393     {
 394       fprintf (file, "ssa name ");
 395       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 396       fprintf (file, "\n");
 397     }
 398
 399   fprintf (file, "  type ");
 400   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 401   fprintf (file, "\n");
 402
 403   if (iv->step)
 404     {
 405       fprintf (file, "  base ");
 406       print_generic_expr (file, iv->base, TDF_SLIM);
 407       fprintf (file, "\n");
 408
 409       fprintf (file, "  step ");
 410       print_generic_expr (file, iv->step, TDF_SLIM);
 411       fprintf (file, "\n");
 412     }
 413   else
 414     {
 415       fprintf (file, "  invariant ");
 416       print_generic_expr (file, iv->base, TDF_SLIM);
 417       fprintf (file, "\n");
 418     }
 419
 420   if (iv->base_object)
 421     {
 422       fprintf (file, "  base object ");
 423       print_generic_expr (file, iv->base_object, TDF_SLIM);
 424       fprintf (file, "\n");
 425     }
 426
 427   if (iv->biv_p)
 428     fprintf (file, "  is a biv\n");
 429 }
 430
 431 /* Dumps information about the USE to FILE.  */
 432
 433 extern void dump_use (FILE *, struct iv_use *);
 434 void
 435 dump_use (FILE *file, struct iv_use *use)
 436 {
 437   fprintf (file, "use %d\n", use->id);
 438
 439   switch (use->type)
 440     {
 441     case USE_NONLINEAR_EXPR:
 442       fprintf (file, "  generic\n");
 443       break;
 444
 445     case USE_ADDRESS:
 446       fprintf (file, "  address\n");
 447       break;
 448
 449     case USE_COMPARE:
 450       fprintf (file, "  compare\n");
 451       break;
 452
 453     default:
 454       gcc_unreachable ();
 455     }
 456
 457   fprintf (file, "  in statement ");
 458   print_gimple_stmt (file, use->stmt, 0, 0);
 459   fprintf (file, "\n");
 460
 461   fprintf (file, "  at position ");
 462   if (use->op_p)
 463     print_generic_expr (file, *use->op_p, TDF_SLIM);
 464   fprintf (file, "\n");
 465
 466   dump_iv (file, use->iv);
 467
 468   if (use->related_cands)
 469     {
 470       fprintf (file, "  related candidates ");
 471       dump_bitmap (file, use->related_cands);
 472     }
 473 }
 474
 475 /* Dumps information about the uses to FILE.  */
 476
 477 extern void dump_uses (FILE *, struct ivopts_data *);
 478 void
 479 dump_uses (FILE *file, struct ivopts_data *data)
 480 {
 481   unsigned i;
 482   struct iv_use *use;
 483
 484   for (i = 0; i < n_iv_uses (data); i++)
 485     {
 486       use = iv_use (data, i);
 487
 488       dump_use (file, use);
 489       fprintf (file, "\n");
 490     }
 491 }
 492
 493 /* Dumps information about induction variable candidate CAND to FILE.  */
 494
 495 extern void dump_cand (FILE *, struct iv_cand *);
 496 void
 497 dump_cand (FILE *file, struct iv_cand *cand)
 498 {
 499   struct iv *iv = cand->iv;
 500
 501   fprintf (file, "candidate %d%s\n",
 502            cand->id, cand->important ? " (important)" : "");
 503
 504   if (cand->depends_on)
 505     {
 506       fprintf (file, "  depends on ");
 507       dump_bitmap (file, cand->depends_on);
 508     }
 509
 510   if (!iv)
 511     {
 512       fprintf (file, "  final value replacement\n");
 513       return;
 514     }
 515
 516   switch (cand->pos)
 517     {
 518     case IP_NORMAL:
 519       fprintf (file, "  incremented before exit test\n");
 520       break;
 521
 522     case IP_BEFORE_USE:
 523       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 524       break;
 525
 526     case IP_AFTER_USE:
 527       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 528       break;
 529
 530     case IP_END:
 531       fprintf (file, "  incremented at end\n");
 532       break;
 533
 534     case IP_ORIGINAL:
 535       fprintf (file, "  original biv\n");
 536       break;
 537     }
 538
 539   dump_iv (file, iv);
 540 }
 541
 542 /* Returns the info for ssa version VER.  */
 543
 544 static inline struct version_info *
 545 ver_info (struct ivopts_data *data, unsigned ver)
 546 {
 547   return data->version_info + ver;
 548 }
 549
 550 /* Returns the info for ssa name NAME.  */
 551
 552 static inline struct version_info *
 553 name_info (struct ivopts_data *data, tree name)
 554 {
 555   return ver_info (data, SSA_NAME_VERSION (name));
 556 }
 557
 558 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 559    emitted in LOOP.  */
 560
 561 static bool
 562 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 563 {
 564   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 565
 566   gcc_assert (bb);
 567
 568   if (sbb == loop->latch)
 569     return true;
 570
 571   if (sbb != bb)
 572     return false;
 573
 574   return stmt == last_stmt (bb);
 575 }
 576
 577 /* Returns true if STMT if after the place where the original induction
 578    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 579    if the positions are identical.  */
 580
 581 static bool
 582 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 583 {
 584   basic_block cand_bb = gimple_bb (cand->incremented_at);
 585   basic_block stmt_bb = gimple_bb (stmt);
 586
 587   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 588     return false;
 589
 590   if (stmt_bb != cand_bb)
 591     return true;
 592
 593   if (true_if_equal
 594       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 595     return true;
 596   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 597 }
 598
 599 /* Returns true if STMT if after the place where the induction variable
 600    CAND is incremented in LOOP.  */
 601
 602 static bool
 603 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 604 {
 605   switch (cand->pos)
 606     {
 607     case IP_END:
 608       return false;
 609
 610     case IP_NORMAL:
 611       return stmt_after_ip_normal_pos (loop, stmt);
 612
 613     case IP_ORIGINAL:
 614     case IP_AFTER_USE:
 615       return stmt_after_inc_pos (cand, stmt, false);
 616
 617     case IP_BEFORE_USE:
 618       return stmt_after_inc_pos (cand, stmt, true);
 619
 620     default:
 621       gcc_unreachable ();
 622     }
 623 }
 624
 625 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 626
 627 static bool
 628 abnormal_ssa_name_p (tree exp)
 629 {
 630   if (!exp)
 631     return false;
 632
 633   if (TREE_CODE (exp) != SSA_NAME)
 634     return false;
 635
 636   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 637 }
 638
 639 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 640    abnormal phi node.  Callback for for_each_index.  */
 641
 642 static bool
 643 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 644                                   void *data ATTRIBUTE_UNUSED)
 645 {
 646   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 647     {
 648       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 649         return false;
 650       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 651         return false;
 652     }
 653
 654   return !abnormal_ssa_name_p (*index);
 655 }
 656
 657 /* Returns true if EXPR contains a ssa name that occurs in an
 658    abnormal phi node.  */
 659
 660 bool
 661 contains_abnormal_ssa_name_p (tree expr)
 662 {
 663   enum tree_code code;
 664   enum tree_code_class codeclass;
 665
 666   if (!expr)
 667     return false;
 668
 669   code = TREE_CODE (expr);
 670   codeclass = TREE_CODE_CLASS (code);
 671
 672   if (code == SSA_NAME)
 673     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 674
 675   if (code == INTEGER_CST
 676       || is_gimple_min_invariant (expr))
 677     return false;
 678
 679   if (code == ADDR_EXPR)
 680     return !for_each_index (&TREE_OPERAND (expr, 0),
 681                             idx_contains_abnormal_ssa_name_p,
 682                             NULL);
 683
 684   if (code == COND_EXPR)
 685     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 686       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 687       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 688
 689   switch (codeclass)
 690     {
 691     case tcc_binary:
 692     case tcc_comparison:
 693       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 694         return true;
 695
 696       /* Fallthru.  */
 697     case tcc_unary:
 698       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 699         return true;
 700
 701       break;
 702
 703     default:
 704       gcc_unreachable ();
 705     }
 706
 707   return false;
 708 }
 709
 710 /*  Returns tree describing number of iterations determined from
 711     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 712
 713 static tree
 714 niter_for_exit (struct ivopts_data *data, edge exit)
 715 {
 716   struct tree_niter_desc desc;
 717   tree niter;
 718   void **slot;
 719
 720   if (!data->niters)
 721     {
 722       data->niters = pointer_map_create ();
 723       slot = NULL;
 724     }
 725   else
 726     slot = pointer_map_contains (data->niters, exit);
 727
 728   if (!slot)
 729     {
 730       /* Try to determine number of iterations.  We must know it
 731          unconditionally (i.e., without possibility of # of iterations
 732          being zero).  Also, we cannot safely work with ssa names that
 733          appear in phi nodes on abnormal edges, so that we do not create
 734          overlapping life ranges for them (PR 27283).  */
 735       if (number_of_iterations_exit (data->current_loop,
 736                                      exit, &desc, true)
 737           && integer_zerop (desc.may_be_zero)
 738           && !contains_abnormal_ssa_name_p (desc.niter))
 739         niter = desc.niter;
 740       else
 741         niter = NULL_TREE;
 742
 743       *pointer_map_insert (data->niters, exit) = niter;
 744     }
 745   else
 746     niter = (tree) *slot;
 747
 748   return niter;
 749 }
 750
 751 /* Returns tree describing number of iterations determined from
 752    single dominating exit of DATA->current_loop, or NULL if something
 753    goes wrong.  */
 754
 755 static tree
 756 niter_for_single_dom_exit (struct ivopts_data *data)
 757 {
 758   edge exit = single_dom_exit (data->current_loop);
 759
 760   if (!exit)
 761     return NULL;
 762
 763   return niter_for_exit (data, exit);
 764 }
 765
 766 /* Initializes data structures used by the iv optimization pass, stored
 767    in DATA.  */
 768
 769 static void
 770 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 771 {
 772   data->version_info_size = 2 * num_ssa_names;
 773   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 774   data->relevant = BITMAP_ALLOC (NULL);
 775   data->important_candidates = BITMAP_ALLOC (NULL);
 776   data->max_inv_id = 0;
 777   data->niters = NULL;
 778   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 779   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 780   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 781 }
 782
 783 /* Returns a memory object to that EXPR points.  In case we are able to
 784    determine that it does not point to any such object, NULL is returned.  */
 785
 786 static tree
 787 determine_base_object (tree expr)
 788 {
 789   enum tree_code code = TREE_CODE (expr);
 790   tree base, obj;
 791
 792   /* If this is a pointer casted to any type, we need to determine
 793      the base object for the pointer; so handle conversions before
 794      throwing away non-pointer expressions.  */
 795   if (CONVERT_EXPR_P (expr))
 796     return determine_base_object (TREE_OPERAND (expr, 0));
 797
 798   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 799     return NULL_TREE;
 800
 801   switch (code)
 802     {
 803     case INTEGER_CST:
 804       return NULL_TREE;
 805
 806     case ADDR_EXPR:
 807       obj = TREE_OPERAND (expr, 0);
 808       base = get_base_address (obj);
 809
 810       if (!base)
 811         return expr;
 812
 813       if (TREE_CODE (base) == INDIRECT_REF)
 814         return determine_base_object (TREE_OPERAND (base, 0));
 815
 816       return fold_convert (ptr_type_node,
 817                            build_fold_addr_expr (base));
 818
 819     case POINTER_PLUS_EXPR:
 820       return determine_base_object (TREE_OPERAND (expr, 0));
 821
 822     case PLUS_EXPR:
 823     case MINUS_EXPR:
 824       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 825       gcc_unreachable ();
 826
 827     default:
 828       return fold_convert (ptr_type_node, expr);
 829     }
 830 }
 831
 832 /* Allocates an induction variable with given initial value BASE and step STEP
 833    for loop LOOP.  */
 834
 835 static struct iv *
 836 alloc_iv (tree base, tree step)
 837 {
 838   struct iv *iv = XCNEW (struct iv);
 839   gcc_assert (step != NULL_TREE);
 840
 841   iv->base = base;
 842   iv->base_object = determine_base_object (base);
 843   iv->step = step;
 844   iv->biv_p = false;
 845   iv->have_use_for = false;
 846   iv->use_id = 0;
 847   iv->ssa_name = NULL_TREE;
 848
 849   return iv;
 850 }
 851
 852 /* Sets STEP and BASE for induction variable IV.  */
 853
 854 static void
 855 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 856 {
 857   struct version_info *info = name_info (data, iv);
 858
 859   gcc_assert (!info->iv);
 860
 861   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 862   info->iv = alloc_iv (base, step);
 863   info->iv->ssa_name = iv;
 864 }
 865
 866 /* Finds induction variable declaration for VAR.  */
 867
 868 static struct iv *
 869 get_iv (struct ivopts_data *data, tree var)
 870 {
 871   basic_block bb;
 872   tree type = TREE_TYPE (var);
 873
 874   if (!POINTER_TYPE_P (type)
 875       && !INTEGRAL_TYPE_P (type))
 876     return NULL;
 877
 878   if (!name_info (data, var)->iv)
 879     {
 880       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 881
 882       if (!bb
 883           || !flow_bb_inside_loop_p (data->current_loop, bb))
 884         set_iv (data, var, var, build_int_cst (type, 0));
 885     }
 886
 887   return name_info (data, var)->iv;
 888 }
 889
 890 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 891    not define a simple affine biv with nonzero step.  */
 892
 893 static tree
 894 determine_biv_step (gimple phi)
 895 {
 896   struct loop *loop = gimple_bb (phi)->loop_father;
 897   tree name = PHI_RESULT (phi);
 898   affine_iv iv;
 899
 900   if (!is_gimple_reg (name))
 901     return NULL_TREE;
 902
 903   if (!simple_iv (loop, loop, name, &iv, true))
 904     return NULL_TREE;
 905
 906   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 907 }
 908
 909 /* Finds basic ivs.  */
 910
 911 static bool
 912 find_bivs (struct ivopts_data *data)
 913 {
 914   gimple phi;
 915   tree step, type, base;
 916   bool found = false;
 917   struct loop *loop = data->current_loop;
 918   gimple_stmt_iterator psi;
 919
 920   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 921     {
 922       phi = gsi_stmt (psi);
 923
 924       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 925         continue;
 926
 927       step = determine_biv_step (phi);
 928       if (!step)
 929         continue;
 930
 931       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 932       base = expand_simple_operations (base);
 933       if (contains_abnormal_ssa_name_p (base)
 934           || contains_abnormal_ssa_name_p (step))
 935         continue;
 936
 937       type = TREE_TYPE (PHI_RESULT (phi));
 938       base = fold_convert (type, base);
 939       if (step)
 940         {
 941           if (POINTER_TYPE_P (type))
 942             step = fold_convert (sizetype, step);
 943           else
 944             step = fold_convert (type, step);
 945         }
 946
 947       set_iv (data, PHI_RESULT (phi), base, step);
 948       found = true;
 949     }
 950
 951   return found;
 952 }
 953
 954 /* Marks basic ivs.  */
 955
 956 static void
 957 mark_bivs (struct ivopts_data *data)
 958 {
 959   gimple phi;
 960   tree var;
 961   struct iv *iv, *incr_iv;
 962   struct loop *loop = data->current_loop;
 963   basic_block incr_bb;
 964   gimple_stmt_iterator psi;
 965
 966   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 967     {
 968       phi = gsi_stmt (psi);
 969
 970       iv = get_iv (data, PHI_RESULT (phi));
 971       if (!iv)
 972         continue;
 973
 974       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 975       incr_iv = get_iv (data, var);
 976       if (!incr_iv)
 977         continue;
 978
 979       /* If the increment is in the subloop, ignore it.  */
 980       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 981       if (incr_bb->loop_father != data->current_loop
 982           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 983         continue;
 984
 985       iv->biv_p = true;
 986       incr_iv->biv_p = true;
 987     }
 988 }
 989
 990 /* Checks whether STMT defines a linear induction variable and stores its
 991    parameters to IV.  */
 992
 993 static bool
 994 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
 995 {
 996   tree lhs;
 997   struct loop *loop = data->current_loop;
 998
 999   iv->base = NULL_TREE;
1000   iv->step = NULL_TREE;
1001
1002   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1003     return false;
1004
1005   lhs = gimple_assign_lhs (stmt);
1006   if (TREE_CODE (lhs) != SSA_NAME)
1007     return false;
1008
1009   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1010     return false;
1011   iv->base = expand_simple_operations (iv->base);
1012
1013   if (contains_abnormal_ssa_name_p (iv->base)
1014       || contains_abnormal_ssa_name_p (iv->step))
1015     return false;
1016
1017   return true;
1018 }
1019
1020 /* Finds general ivs in statement STMT.  */
1021
1022 static void
1023 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1024 {
1025   affine_iv iv;
1026
1027   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1028     return;
1029
1030   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1031 }
1032
1033 /* Finds general ivs in basic block BB.  */
1034
1035 static void
1036 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1037 {
1038   gimple_stmt_iterator bsi;
1039
1040   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1041     find_givs_in_stmt (data, gsi_stmt (bsi));
1042 }
1043
1044 /* Finds general ivs.  */
1045
1046 static void
1047 find_givs (struct ivopts_data *data)
1048 {
1049   struct loop *loop = data->current_loop;
1050   basic_block *body = get_loop_body_in_dom_order (loop);
1051   unsigned i;
1052
1053   for (i = 0; i < loop->num_nodes; i++)
1054     find_givs_in_bb (data, body[i]);
1055   free (body);
1056 }
1057
1058 /* For each ssa name defined in LOOP determines whether it is an induction
1059    variable and if so, its initial value and step.  */
1060
1061 static bool
1062 find_induction_variables (struct ivopts_data *data)
1063 {
1064   unsigned i;
1065   bitmap_iterator bi;
1066
1067   if (!find_bivs (data))
1068     return false;
1069
1070   find_givs (data);
1071   mark_bivs (data);
1072
1073   if (dump_file && (dump_flags & TDF_DETAILS))
1074     {
1075       tree niter = niter_for_single_dom_exit (data);
1076
1077       if (niter)
1078         {
1079           fprintf (dump_file, "  number of iterations ");
1080           print_generic_expr (dump_file, niter, TDF_SLIM);
1081           fprintf (dump_file, "\n\n");
1082         };
1083
1084       fprintf (dump_file, "Induction variables:\n\n");
1085
1086       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1087         {
1088           if (ver_info (data, i)->iv)
1089             dump_iv (dump_file, ver_info (data, i)->iv);
1090         }
1091     }
1092
1093   return true;
1094 }
1095
1096 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1097
1098 static struct iv_use *
1099 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1100             gimple stmt, enum use_type use_type)
1101 {
1102   struct iv_use *use = XCNEW (struct iv_use);
1103
1104   use->id = n_iv_uses (data);
1105   use->type = use_type;
1106   use->iv = iv;
1107   use->stmt = stmt;
1108   use->op_p = use_p;
1109   use->related_cands = BITMAP_ALLOC (NULL);
1110
1111   /* To avoid showing ssa name in the dumps, if it was not reset by the
1112      caller.  */
1113   iv->ssa_name = NULL_TREE;
1114
1115   if (dump_file && (dump_flags & TDF_DETAILS))
1116     dump_use (dump_file, use);
1117
1118   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1119
1120   return use;
1121 }
1122
1123 /* Checks whether OP is a loop-level invariant and if so, records it.
1124    NONLINEAR_USE is true if the invariant is used in a way we do not
1125    handle specially.  */
1126
1127 static void
1128 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1129 {
1130   basic_block bb;
1131   struct version_info *info;
1132
1133   if (TREE_CODE (op) != SSA_NAME
1134       || !is_gimple_reg (op))
1135     return;
1136
1137   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1138   if (bb
1139       && flow_bb_inside_loop_p (data->current_loop, bb))
1140     return;
1141
1142   info = name_info (data, op);
1143   info->name = op;
1144   info->has_nonlin_use |= nonlinear_use;
1145   if (!info->inv_id)
1146     info->inv_id = ++data->max_inv_id;
1147   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1148 }
1149
1150 /* Checks whether the use OP is interesting and if so, records it.  */
1151
1152 static struct iv_use *
1153 find_interesting_uses_op (struct ivopts_data *data, tree op)
1154 {
1155   struct iv *iv;
1156   struct iv *civ;
1157   gimple stmt;
1158   struct iv_use *use;
1159
1160   if (TREE_CODE (op) != SSA_NAME)
1161     return NULL;
1162
1163   iv = get_iv (data, op);
1164   if (!iv)
1165     return NULL;
1166
1167   if (iv->have_use_for)
1168     {
1169       use = iv_use (data, iv->use_id);
1170
1171       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1172       return use;
1173     }
1174
1175   if (integer_zerop (iv->step))
1176     {
1177       record_invariant (data, op, true);
1178       return NULL;
1179     }
1180   iv->have_use_for = true;
1181
1182   civ = XNEW (struct iv);
1183   *civ = *iv;
1184
1185   stmt = SSA_NAME_DEF_STMT (op);
1186   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1187               || is_gimple_assign (stmt));
1188
1189   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1190   iv->use_id = use->id;
1191
1192   return use;
1193 }
1194
1195 /* Given a condition in statement STMT, checks whether it is a compare
1196    of an induction variable and an invariant.  If this is the case,
1197    CONTROL_VAR is set to location of the iv, BOUND to the location of
1198    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1199    induction variable descriptions, and true is returned.  If this is not
1200    the case, CONTROL_VAR and BOUND are set to the arguments of the
1201    condition and false is returned.  */
1202
1203 static bool
1204 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1205                        tree **control_var, tree **bound,
1206                        struct iv **iv_var, struct iv **iv_bound)
1207 {
1208   /* The objects returned when COND has constant operands.  */
1209   static struct iv const_iv;
1210   static tree zero;
1211   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1212   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1213   bool ret = false;
1214
1215   if (gimple_code (stmt) == GIMPLE_COND)
1216     {
1217       op0 = gimple_cond_lhs_ptr (stmt);
1218       op1 = gimple_cond_rhs_ptr (stmt);
1219     }
1220   else
1221     {
1222       op0 = gimple_assign_rhs1_ptr (stmt);
1223       op1 = gimple_assign_rhs2_ptr (stmt);
1224     }
1225
1226   zero = integer_zero_node;
1227   const_iv.step = integer_zero_node;
1228
1229   if (TREE_CODE (*op0) == SSA_NAME)
1230     iv0 = get_iv (data, *op0);
1231   if (TREE_CODE (*op1) == SSA_NAME)
1232     iv1 = get_iv (data, *op1);
1233
1234   /* Exactly one of the compared values must be an iv, and the other one must
1235      be an invariant.  */
1236   if (!iv0 || !iv1)
1237     goto end;
1238
1239   if (integer_zerop (iv0->step))
1240     {
1241       /* Control variable may be on the other side.  */
1242       tmp_op = op0; op0 = op1; op1 = tmp_op;
1243       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1244     }
1245   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1246
1247 end:
1248   if (control_var)
1249     *control_var = op0;;
1250   if (iv_var)
1251     *iv_var = iv0;;
1252   if (bound)
1253     *bound = op1;
1254   if (iv_bound)
1255     *iv_bound = iv1;
1256
1257   return ret;
1258 }
1259
1260 /* Checks whether the condition in STMT is interesting and if so,
1261    records it.  */
1262
1263 static void
1264 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1265 {
1266   tree *var_p, *bound_p;
1267   struct iv *var_iv, *civ;
1268
1269   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1270     {
1271       find_interesting_uses_op (data, *var_p);
1272       find_interesting_uses_op (data, *bound_p);
1273       return;
1274     }
1275
1276   civ = XNEW (struct iv);
1277   *civ = *var_iv;
1278   record_use (data, NULL, civ, stmt, USE_COMPARE);
1279 }
1280
1281 /* Returns true if expression EXPR is obviously invariant in LOOP,
1282    i.e. if all its operands are defined outside of the LOOP.  LOOP
1283    should not be the function body.  */
1284
1285 bool
1286 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1287 {
1288   basic_block def_bb;
1289   unsigned i, len;
1290
1291   gcc_assert (loop_depth (loop) > 0);
1292
1293   if (is_gimple_min_invariant (expr))
1294     return true;
1295
1296   if (TREE_CODE (expr) == SSA_NAME)
1297     {
1298       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1299       if (def_bb
1300           && flow_bb_inside_loop_p (loop, def_bb))
1301         return false;
1302
1303       return true;
1304     }
1305
1306   if (!EXPR_P (expr))
1307     return false;
1308
1309   len = TREE_OPERAND_LENGTH (expr);
1310   for (i = 0; i < len; i++)
1311     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1312       return false;
1313
1314   return true;
1315 }
1316
1317 /* Returns true if statement STMT is obviously invariant in LOOP,
1318    i.e. if all its operands on the RHS are defined outside of the LOOP.
1319    LOOP should not be the function body.  */
1320
1321 bool
1322 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1323 {
1324   unsigned i;
1325   tree lhs;
1326
1327   gcc_assert (loop_depth (loop) > 0);
1328
1329   lhs = gimple_get_lhs (stmt);
1330   for (i = 0; i < gimple_num_ops (stmt); i++)
1331     {
1332       tree op = gimple_op (stmt, i);
1333       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1334         return false;
1335     }
1336
1337   return true;
1338 }
1339
1340 /* Cumulates the steps of indices into DATA and replaces their values with the
1341    initial ones.  Returns false when the value of the index cannot be determined.
1342    Callback for for_each_index.  */
1343
1344 struct ifs_ivopts_data
1345 {
1346   struct ivopts_data *ivopts_data;
1347   gimple stmt;
1348   tree step;
1349 };
1350
1351 static bool
1352 idx_find_step (tree base, tree *idx, void *data)
1353 {
1354   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1355   struct iv *iv;
1356   tree step, iv_base, iv_step, lbound, off;
1357   struct loop *loop = dta->ivopts_data->current_loop;
1358
1359   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1360       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1361     return false;
1362
1363   /* If base is a component ref, require that the offset of the reference
1364      be invariant.  */
1365   if (TREE_CODE (base) == COMPONENT_REF)
1366     {
1367       off = component_ref_field_offset (base);
1368       return expr_invariant_in_loop_p (loop, off);
1369     }
1370
1371   /* If base is array, first check whether we will be able to move the
1372      reference out of the loop (in order to take its address in strength
1373      reduction).  In order for this to work we need both lower bound
1374      and step to be loop invariants.  */
1375   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1376     {
1377       /* Moreover, for a range, the size needs to be invariant as well.  */
1378       if (TREE_CODE (base) == ARRAY_RANGE_REF
1379           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1380         return false;
1381
1382       step = array_ref_element_size (base);
1383       lbound = array_ref_low_bound (base);
1384
1385       if (!expr_invariant_in_loop_p (loop, step)
1386           || !expr_invariant_in_loop_p (loop, lbound))
1387         return false;
1388     }
1389
1390   if (TREE_CODE (*idx) != SSA_NAME)
1391     return true;
1392
1393   iv = get_iv (dta->ivopts_data, *idx);
1394   if (!iv)
1395     return false;
1396
1397   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1398           *&x[0], which is not folded and does not trigger the
1399           ARRAY_REF path below.  */
1400   *idx = iv->base;
1401
1402   if (integer_zerop (iv->step))
1403     return true;
1404
1405   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1406     {
1407       step = array_ref_element_size (base);
1408
1409       /* We only handle addresses whose step is an integer constant.  */
1410       if (TREE_CODE (step) != INTEGER_CST)
1411         return false;
1412     }
1413   else
1414     /* The step for pointer arithmetics already is 1 byte.  */
1415     step = build_int_cst (sizetype, 1);
1416
1417   iv_base = iv->base;
1418   iv_step = iv->step;
1419   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1420                             sizetype, &iv_base, &iv_step, dta->stmt,
1421                             false))
1422     {
1423       /* The index might wrap.  */
1424       return false;
1425     }
1426
1427   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1428   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1429
1430   return true;
1431 }
1432
1433 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1434    object is passed to it in DATA.  */
1435
1436 static bool
1437 idx_record_use (tree base, tree *idx,
1438                 void *vdata)
1439 {
1440   struct ivopts_data *data = (struct ivopts_data *) vdata;
1441   find_interesting_uses_op (data, *idx);
1442   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1443     {
1444       find_interesting_uses_op (data, array_ref_element_size (base));
1445       find_interesting_uses_op (data, array_ref_low_bound (base));
1446     }
1447   return true;
1448 }
1449
1450 /* If we can prove that TOP = cst * BOT for some constant cst,
1451    store cst to MUL and return true.  Otherwise return false.
1452    The returned value is always sign-extended, regardless of the
1453    signedness of TOP and BOT.  */
1454
1455 static bool
1456 constant_multiple_of (tree top, tree bot, double_int *mul)
1457 {
1458   tree mby;
1459   enum tree_code code;
1460   double_int res, p0, p1;
1461   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1462
1463   STRIP_NOPS (top);
1464   STRIP_NOPS (bot);
1465
1466   if (operand_equal_p (top, bot, 0))
1467     {
1468       *mul = double_int_one;
1469       return true;
1470     }
1471
1472   code = TREE_CODE (top);
1473   switch (code)
1474     {
1475     case MULT_EXPR:
1476       mby = TREE_OPERAND (top, 1);
1477       if (TREE_CODE (mby) != INTEGER_CST)
1478         return false;
1479
1480       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1481         return false;
1482
1483       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1484                               precision);
1485       return true;
1486
1487     case PLUS_EXPR:
1488     case MINUS_EXPR:
1489       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1490           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1491         return false;
1492
1493       if (code == MINUS_EXPR)
1494         p1 = double_int_neg (p1);
1495       *mul = double_int_sext (double_int_add (p0, p1), precision);
1496       return true;
1497
1498     case INTEGER_CST:
1499       if (TREE_CODE (bot) != INTEGER_CST)
1500         return false;
1501
1502       p0 = double_int_sext (tree_to_double_int (top), precision);
1503       p1 = double_int_sext (tree_to_double_int (bot), precision);
1504       if (double_int_zero_p (p1))
1505         return false;
1506       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1507                               precision);
1508       return double_int_zero_p (res);
1509
1510     default:
1511       return false;
1512     }
1513 }
1514
1515 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1516
1517 static bool
1518 may_be_unaligned_p (tree ref, tree step)
1519 {
1520   tree base;
1521   tree base_type;
1522   HOST_WIDE_INT bitsize;
1523   HOST_WIDE_INT bitpos;
1524   tree toffset;
1525   enum machine_mode mode;
1526   int unsignedp, volatilep;
1527   unsigned base_align;
1528
1529   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1530      thus they are not misaligned.  */
1531   if (TREE_CODE (ref) == TARGET_MEM_REF)
1532     return false;
1533
1534   /* The test below is basically copy of what expr.c:normal_inner_ref
1535      does to check whether the object must be loaded by parts when
1536      STRICT_ALIGNMENT is true.  */
1537   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1538                               &unsignedp, &volatilep, true);
1539   base_type = TREE_TYPE (base);
1540   base_align = TYPE_ALIGN (base_type);
1541
1542   if (mode != BLKmode)
1543     {
1544       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1545
1546       if (base_align < mode_align
1547           || (bitpos % mode_align) != 0
1548           || (bitpos % BITS_PER_UNIT) != 0)
1549         return true;
1550
1551       if (toffset
1552           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1553         return true;
1554
1555       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1556         return true;
1557     }
1558
1559   return false;
1560 }
1561
1562 /* Return true if EXPR may be non-addressable.   */
1563
1564 static bool
1565 may_be_nonaddressable_p (tree expr)
1566 {
1567   switch (TREE_CODE (expr))
1568     {
1569     case TARGET_MEM_REF:
1570       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1571          target, thus they are always addressable.  */
1572       return false;
1573
1574     case COMPONENT_REF:
1575       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1576              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1577
1578     case VIEW_CONVERT_EXPR:
1579       /* This kind of view-conversions may wrap non-addressable objects
1580          and make them look addressable.  After some processing the
1581          non-addressability may be uncovered again, causing ADDR_EXPRs
1582          of inappropriate objects to be built.  */
1583       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1584           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1585         return true;
1586
1587       /* ... fall through ... */
1588
1589     case ARRAY_REF:
1590     case ARRAY_RANGE_REF:
1591       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1592
1593     CASE_CONVERT:
1594       return true;
1595
1596     default:
1597       break;
1598     }
1599
1600   return false;
1601 }
1602
1603 /* Finds addresses in *OP_P inside STMT.  */
1604
1605 static void
1606 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1607 {
1608   tree base = *op_p, step = build_int_cst (sizetype, 0);
1609   struct iv *civ;
1610   struct ifs_ivopts_data ifs_ivopts_data;
1611
1612   /* Do not play with volatile memory references.  A bit too conservative,
1613      perhaps, but safe.  */
1614   if (gimple_has_volatile_ops (stmt))
1615     goto fail;
1616
1617   /* Ignore bitfields for now.  Not really something terribly complicated
1618      to handle.  TODO.  */
1619   if (TREE_CODE (base) == BIT_FIELD_REF)
1620     goto fail;
1621
1622   base = unshare_expr (base);
1623
1624   if (TREE_CODE (base) == TARGET_MEM_REF)
1625     {
1626       tree type = build_pointer_type (TREE_TYPE (base));
1627       tree astep;
1628
1629       if (TMR_BASE (base)
1630           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1631         {
1632           civ = get_iv (data, TMR_BASE (base));
1633           if (!civ)
1634             goto fail;
1635
1636           TMR_BASE (base) = civ->base;
1637           step = civ->step;
1638         }
1639       if (TMR_INDEX (base)
1640           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1641         {
1642           civ = get_iv (data, TMR_INDEX (base));
1643           if (!civ)
1644             goto fail;
1645
1646           TMR_INDEX (base) = civ->base;
1647           astep = civ->step;
1648
1649           if (astep)
1650             {
1651               if (TMR_STEP (base))
1652                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1653
1654               step = fold_build2 (PLUS_EXPR, type, step, astep);
1655             }
1656         }
1657
1658       if (integer_zerop (step))
1659         goto fail;
1660       base = tree_mem_ref_addr (type, base);
1661     }
1662   else
1663     {
1664       ifs_ivopts_data.ivopts_data = data;
1665       ifs_ivopts_data.stmt = stmt;
1666       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1667       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1668           || integer_zerop (ifs_ivopts_data.step))
1669         goto fail;
1670       step = ifs_ivopts_data.step;
1671
1672       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1673       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1674
1675       /* Check that the base expression is addressable.  This needs
1676          to be done after substituting bases of IVs into it.  */
1677       if (may_be_nonaddressable_p (base))
1678         goto fail;
1679
1680       /* Moreover, on strict alignment platforms, check that it is
1681          sufficiently aligned.  */
1682       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1683         goto fail;
1684
1685       base = build_fold_addr_expr (base);
1686
1687       /* Substituting bases of IVs into the base expression might
1688          have caused folding opportunities.  */
1689       if (TREE_CODE (base) == ADDR_EXPR)
1690         {
1691           tree *ref = &TREE_OPERAND (base, 0);
1692           while (handled_component_p (*ref))
1693             ref = &TREE_OPERAND (*ref, 0);
1694           if (TREE_CODE (*ref) == INDIRECT_REF)
1695             {
1696               tree tem = gimple_fold_indirect_ref (TREE_OPERAND (*ref, 0));
1697               if (tem)
1698                 *ref = tem;
1699             }
1700         }
1701     }
1702
1703   civ = alloc_iv (base, step);
1704   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1705   return;
1706
1707 fail:
1708   for_each_index (op_p, idx_record_use, data);
1709 }
1710
1711 /* Finds and records invariants used in STMT.  */
1712
1713 static void
1714 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1715 {
1716   ssa_op_iter iter;
1717   use_operand_p use_p;
1718   tree op;
1719
1720   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1721     {
1722       op = USE_FROM_PTR (use_p);
1723       record_invariant (data, op, false);
1724     }
1725 }
1726
1727 /* Finds interesting uses of induction variables in the statement STMT.  */
1728
1729 static void
1730 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1731 {
1732   struct iv *iv;
1733   tree op, *lhs, *rhs;
1734   ssa_op_iter iter;
1735   use_operand_p use_p;
1736   enum tree_code code;
1737
1738   find_invariants_stmt (data, stmt);
1739
1740   if (gimple_code (stmt) == GIMPLE_COND)
1741     {
1742       find_interesting_uses_cond (data, stmt);
1743       return;
1744     }
1745
1746   if (is_gimple_assign (stmt))
1747     {
1748       lhs = gimple_assign_lhs_ptr (stmt);
1749       rhs = gimple_assign_rhs1_ptr (stmt);
1750
1751       if (TREE_CODE (*lhs) == SSA_NAME)
1752         {
1753           /* If the statement defines an induction variable, the uses are not
1754              interesting by themselves.  */
1755
1756           iv = get_iv (data, *lhs);
1757
1758           if (iv && !integer_zerop (iv->step))
1759             return;
1760         }
1761
1762       code = gimple_assign_rhs_code (stmt);
1763       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1764           && (REFERENCE_CLASS_P (*rhs)
1765               || is_gimple_val (*rhs)))
1766         {
1767           if (REFERENCE_CLASS_P (*rhs))
1768             find_interesting_uses_address (data, stmt, rhs);
1769           else
1770             find_interesting_uses_op (data, *rhs);
1771
1772           if (REFERENCE_CLASS_P (*lhs))
1773             find_interesting_uses_address (data, stmt, lhs);
1774           return;
1775         }
1776       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1777         {
1778           find_interesting_uses_cond (data, stmt);
1779           return;
1780         }
1781
1782       /* TODO -- we should also handle address uses of type
1783
1784          memory = call (whatever);
1785
1786          and
1787
1788          call (memory).  */
1789     }
1790
1791   if (gimple_code (stmt) == GIMPLE_PHI
1792       && gimple_bb (stmt) == data->current_loop->header)
1793     {
1794       iv = get_iv (data, PHI_RESULT (stmt));
1795
1796       if (iv && !integer_zerop (iv->step))
1797         return;
1798     }
1799
1800   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1801     {
1802       op = USE_FROM_PTR (use_p);
1803
1804       if (TREE_CODE (op) != SSA_NAME)
1805         continue;
1806
1807       iv = get_iv (data, op);
1808       if (!iv)
1809         continue;
1810
1811       find_interesting_uses_op (data, op);
1812     }
1813 }
1814
1815 /* Finds interesting uses of induction variables outside of loops
1816    on loop exit edge EXIT.  */
1817
1818 static void
1819 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1820 {
1821   gimple phi;
1822   gimple_stmt_iterator psi;
1823   tree def;
1824
1825   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1826     {
1827       phi = gsi_stmt (psi);
1828       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1829       if (is_gimple_reg (def))
1830         find_interesting_uses_op (data, def);
1831     }
1832 }
1833
1834 /* Finds uses of the induction variables that are interesting.  */
1835
1836 static void
1837 find_interesting_uses (struct ivopts_data *data)
1838 {
1839   basic_block bb;
1840   gimple_stmt_iterator bsi;
1841   basic_block *body = get_loop_body (data->current_loop);
1842   unsigned i;
1843   struct version_info *info;
1844   edge e;
1845
1846   if (dump_file && (dump_flags & TDF_DETAILS))
1847     fprintf (dump_file, "Uses:\n\n");
1848
1849   for (i = 0; i < data->current_loop->num_nodes; i++)
1850     {
1851       edge_iterator ei;
1852       bb = body[i];
1853
1854       FOR_EACH_EDGE (e, ei, bb->succs)
1855         if (e->dest != EXIT_BLOCK_PTR
1856             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1857           find_interesting_uses_outside (data, e);
1858
1859       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1860         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1861       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1862         if (!is_gimple_debug (gsi_stmt (bsi)))
1863           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1864     }
1865
1866   if (dump_file && (dump_flags & TDF_DETAILS))
1867     {
1868       bitmap_iterator bi;
1869
1870       fprintf (dump_file, "\n");
1871
1872       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1873         {
1874           info = ver_info (data, i);
1875           if (info->inv_id)
1876             {
1877               fprintf (dump_file, "  ");
1878               print_generic_expr (dump_file, info->name, TDF_SLIM);
1879               fprintf (dump_file, " is invariant (%d)%s\n",
1880                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1881             }
1882         }
1883
1884       fprintf (dump_file, "\n");
1885     }
1886
1887   free (body);
1888 }
1889
1890 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1891    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1892    we are at the top-level of the processed address.  */
1893
1894 static tree
1895 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1896                 unsigned HOST_WIDE_INT *offset)
1897 {
1898   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1899   enum tree_code code;
1900   tree type, orig_type = TREE_TYPE (expr);
1901   unsigned HOST_WIDE_INT off0, off1, st;
1902   tree orig_expr = expr;
1903
1904   STRIP_NOPS (expr);
1905
1906   type = TREE_TYPE (expr);
1907   code = TREE_CODE (expr);
1908   *offset = 0;
1909
1910   switch (code)
1911     {
1912     case INTEGER_CST:
1913       if (!cst_and_fits_in_hwi (expr)
1914           || integer_zerop (expr))
1915         return orig_expr;
1916
1917       *offset = int_cst_value (expr);
1918       return build_int_cst (orig_type, 0);
1919
1920     case POINTER_PLUS_EXPR:
1921     case PLUS_EXPR:
1922     case MINUS_EXPR:
1923       op0 = TREE_OPERAND (expr, 0);
1924       op1 = TREE_OPERAND (expr, 1);
1925
1926       op0 = strip_offset_1 (op0, false, false, &off0);
1927       op1 = strip_offset_1 (op1, false, false, &off1);
1928
1929       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1930       if (op0 == TREE_OPERAND (expr, 0)
1931           && op1 == TREE_OPERAND (expr, 1))
1932         return orig_expr;
1933
1934       if (integer_zerop (op1))
1935         expr = op0;
1936       else if (integer_zerop (op0))
1937         {
1938           if (code == MINUS_EXPR)
1939             expr = fold_build1 (NEGATE_EXPR, type, op1);
1940           else
1941             expr = op1;
1942         }
1943       else
1944         expr = fold_build2 (code, type, op0, op1);
1945
1946       return fold_convert (orig_type, expr);
1947
1948     case MULT_EXPR:
1949       op1 = TREE_OPERAND (expr, 1);
1950       if (!cst_and_fits_in_hwi (op1))
1951         return orig_expr;
1952
1953       op0 = TREE_OPERAND (expr, 0);
1954       op0 = strip_offset_1 (op0, false, false, &off0);
1955       if (op0 == TREE_OPERAND (expr, 0))
1956         return orig_expr;
1957
1958       *offset = off0 * int_cst_value (op1);
1959       if (integer_zerop (op0))
1960         expr = op0;
1961       else
1962         expr = fold_build2 (MULT_EXPR, type, op0, op1);
1963
1964       return fold_convert (orig_type, expr);
1965
1966     case ARRAY_REF:
1967     case ARRAY_RANGE_REF:
1968       if (!inside_addr)
1969         return orig_expr;
1970
1971       step = array_ref_element_size (expr);
1972       if (!cst_and_fits_in_hwi (step))
1973         break;
1974
1975       st = int_cst_value (step);
1976       op1 = TREE_OPERAND (expr, 1);
1977       op1 = strip_offset_1 (op1, false, false, &off1);
1978       *offset = off1 * st;
1979
1980       if (top_compref
1981           && integer_zerop (op1))
1982         {
1983           /* Strip the component reference completely.  */
1984           op0 = TREE_OPERAND (expr, 0);
1985           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1986           *offset += off0;
1987           return op0;
1988         }
1989       break;
1990
1991     case COMPONENT_REF:
1992       if (!inside_addr)
1993         return orig_expr;
1994
1995       tmp = component_ref_field_offset (expr);
1996       if (top_compref
1997           && cst_and_fits_in_hwi (tmp))
1998         {
1999           /* Strip the component reference completely.  */
2000           op0 = TREE_OPERAND (expr, 0);
2001           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2002           *offset = off0 + int_cst_value (tmp);
2003           return op0;
2004         }
2005       break;
2006
2007     case ADDR_EXPR:
2008       op0 = TREE_OPERAND (expr, 0);
2009       op0 = strip_offset_1 (op0, true, true, &off0);
2010       *offset += off0;
2011
2012       if (op0 == TREE_OPERAND (expr, 0))
2013         return orig_expr;
2014
2015       expr = build_fold_addr_expr (op0);
2016       return fold_convert (orig_type, expr);
2017
2018     case INDIRECT_REF:
2019       inside_addr = false;
2020       break;
2021
2022     default:
2023       return orig_expr;
2024     }
2025
2026   /* Default handling of expressions for that we want to recurse into
2027      the first operand.  */
2028   op0 = TREE_OPERAND (expr, 0);
2029   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2030   *offset += off0;
2031
2032   if (op0 == TREE_OPERAND (expr, 0)
2033       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2034     return orig_expr;
2035
2036   expr = copy_node (expr);
2037   TREE_OPERAND (expr, 0) = op0;
2038   if (op1)
2039     TREE_OPERAND (expr, 1) = op1;
2040
2041   /* Inside address, we might strip the top level component references,
2042      thus changing type of the expression.  Handling of ADDR_EXPR
2043      will fix that.  */
2044   expr = fold_convert (orig_type, expr);
2045
2046   return expr;
2047 }
2048
2049 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2050
2051 static tree
2052 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2053 {
2054   return strip_offset_1 (expr, false, false, offset);
2055 }
2056
2057 /* Returns variant of TYPE that can be used as base for different uses.
2058    We return unsigned type with the same precision, which avoids problems
2059    with overflows.  */
2060
2061 static tree
2062 generic_type_for (tree type)
2063 {
2064   if (POINTER_TYPE_P (type))
2065     return unsigned_type_for (type);
2066
2067   if (TYPE_UNSIGNED (type))
2068     return type;
2069
2070   return unsigned_type_for (type);
2071 }
2072
2073 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2074    the bitmap to that we should store it.  */
2075
2076 static struct ivopts_data *fd_ivopts_data;
2077 static tree
2078 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2079 {
2080   bitmap *depends_on = (bitmap *) data;
2081   struct version_info *info;
2082
2083   if (TREE_CODE (*expr_p) != SSA_NAME)
2084     return NULL_TREE;
2085   info = name_info (fd_ivopts_data, *expr_p);
2086
2087   if (!info->inv_id || info->has_nonlin_use)
2088     return NULL_TREE;
2089
2090   if (!*depends_on)
2091     *depends_on = BITMAP_ALLOC (NULL);
2092   bitmap_set_bit (*depends_on, info->inv_id);
2093
2094   return NULL_TREE;
2095 }
2096
2097 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2098    position to POS.  If USE is not NULL, the candidate is set as related to
2099    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2100    replacement of the final value of the iv by a direct computation.  */
2101
2102 static struct iv_cand *
2103 add_candidate_1 (struct ivopts_data *data,
2104                  tree base, tree step, bool important, enum iv_position pos,
2105                  struct iv_use *use, gimple incremented_at)
2106 {
2107   unsigned i;
2108   struct iv_cand *cand = NULL;
2109   tree type, orig_type;
2110
2111   if (base)
2112     {
2113       orig_type = TREE_TYPE (base);
2114       type = generic_type_for (orig_type);
2115       if (type != orig_type)
2116         {
2117           base = fold_convert (type, base);
2118           step = fold_convert (type, step);
2119         }
2120     }
2121
2122   for (i = 0; i < n_iv_cands (data); i++)
2123     {
2124       cand = iv_cand (data, i);
2125
2126       if (cand->pos != pos)
2127         continue;
2128
2129       if (cand->incremented_at != incremented_at
2130           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2131               && cand->ainc_use != use))
2132         continue;
2133
2134       if (!cand->iv)
2135         {
2136           if (!base && !step)
2137             break;
2138
2139           continue;
2140         }
2141
2142       if (!base && !step)
2143         continue;
2144
2145       if (operand_equal_p (base, cand->iv->base, 0)
2146           && operand_equal_p (step, cand->iv->step, 0))
2147         break;
2148     }
2149
2150   if (i == n_iv_cands (data))
2151     {
2152       cand = XCNEW (struct iv_cand);
2153       cand->id = i;
2154
2155       if (!base && !step)
2156         cand->iv = NULL;
2157       else
2158         cand->iv = alloc_iv (base, step);
2159
2160       cand->pos = pos;
2161       if (pos != IP_ORIGINAL && cand->iv)
2162         {
2163           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2164           cand->var_after = cand->var_before;
2165         }
2166       cand->important = important;
2167       cand->incremented_at = incremented_at;
2168       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2169
2170       if (step
2171           && TREE_CODE (step) != INTEGER_CST)
2172         {
2173           fd_ivopts_data = data;
2174           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2175         }
2176
2177       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2178         cand->ainc_use = use;
2179       else
2180         cand->ainc_use = NULL;
2181
2182       if (dump_file && (dump_flags & TDF_DETAILS))
2183         dump_cand (dump_file, cand);
2184     }
2185
2186   if (important && !cand->important)
2187     {
2188       cand->important = true;
2189       if (dump_file && (dump_flags & TDF_DETAILS))
2190         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2191     }
2192
2193   if (use)
2194     {
2195       bitmap_set_bit (use->related_cands, i);
2196       if (dump_file && (dump_flags & TDF_DETAILS))
2197         fprintf (dump_file, "Candidate %d is related to use %d\n",
2198                  cand->id, use->id);
2199     }
2200
2201   return cand;
2202 }
2203
2204 /* Returns true if incrementing the induction variable at the end of the LOOP
2205    is allowed.
2206
2207    The purpose is to avoid splitting latch edge with a biv increment, thus
2208    creating a jump, possibly confusing other optimization passes and leaving
2209    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2210    is not available (so we do not have a better alternative), or if the latch
2211    edge is already nonempty.  */
2212
2213 static bool
2214 allow_ip_end_pos_p (struct loop *loop)
2215 {
2216   if (!ip_normal_pos (loop))
2217     return true;
2218
2219   if (!empty_block_p (ip_end_pos (loop)))
2220     return true;
2221
2222   return false;
2223 }
2224
2225 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2226    Important field is set to IMPORTANT.  */
2227
2228 static void
2229 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2230                         bool important, struct iv_use *use)
2231 {
2232   basic_block use_bb = gimple_bb (use->stmt);
2233   enum machine_mode mem_mode;
2234   unsigned HOST_WIDE_INT cstepi;
2235
2236   /* If we insert the increment in any position other than the standard
2237      ones, we must ensure that it is incremented once per iteration.
2238      It must not be in an inner nested loop, or one side of an if
2239      statement.  */
2240   if (use_bb->loop_father != data->current_loop
2241       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2242       || stmt_could_throw_p (use->stmt)
2243       || !cst_and_fits_in_hwi (step))
2244     return;
2245
2246   cstepi = int_cst_value (step);
2247
2248   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2249   if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2250       || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2251     {
2252       enum tree_code code = MINUS_EXPR;
2253       tree new_base;
2254       tree new_step = step;
2255
2256       if (POINTER_TYPE_P (TREE_TYPE (base)))
2257         {
2258           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2259           code = POINTER_PLUS_EXPR;
2260         }
2261       else
2262         new_step = fold_convert (TREE_TYPE (base), new_step);
2263       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2264       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2265                        use->stmt);
2266     }
2267   if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2268       || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2269     {
2270       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2271                        use->stmt);
2272     }
2273 }
2274
2275 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2276    position to POS.  If USE is not NULL, the candidate is set as related to
2277    it.  The candidate computation is scheduled on all available positions.  */
2278
2279 static void
2280 add_candidate (struct ivopts_data *data,
2281                tree base, tree step, bool important, struct iv_use *use)
2282 {
2283   if (ip_normal_pos (data->current_loop))
2284     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2285   if (ip_end_pos (data->current_loop)
2286       && allow_ip_end_pos_p (data->current_loop))
2287     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2288
2289   if (use != NULL && use->type == USE_ADDRESS)
2290     add_autoinc_candidates (data, base, step, important, use);
2291 }
2292
2293 /* Add a standard "0 + 1 * iteration" iv candidate for a
2294    type with SIZE bits.  */
2295
2296 static void
2297 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2298                                      unsigned int size)
2299 {
2300   tree type = lang_hooks.types.type_for_size (size, true);
2301   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2302                  true, NULL);
2303 }
2304
2305 /* Adds standard iv candidates.  */
2306
2307 static void
2308 add_standard_iv_candidates (struct ivopts_data *data)
2309 {
2310   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2311
2312   /* The same for a double-integer type if it is still fast enough.  */
2313   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2314     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2315 }
2316
2317
2318 /* Adds candidates bases on the old induction variable IV.  */
2319
2320 static void
2321 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2322 {
2323   gimple phi;
2324   tree def;
2325   struct iv_cand *cand;
2326
2327   add_candidate (data, iv->base, iv->step, true, NULL);
2328
2329   /* The same, but with initial value zero.  */
2330   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2331     add_candidate (data, size_int (0), iv->step, true, NULL);
2332   else
2333     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2334                    iv->step, true, NULL);
2335
2336   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2337   if (gimple_code (phi) == GIMPLE_PHI)
2338     {
2339       /* Additionally record the possibility of leaving the original iv
2340          untouched.  */
2341       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2342       cand = add_candidate_1 (data,
2343                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2344                               SSA_NAME_DEF_STMT (def));
2345       cand->var_before = iv->ssa_name;
2346       cand->var_after = def;
2347     }
2348 }
2349
2350 /* Adds candidates based on the old induction variables.  */
2351
2352 static void
2353 add_old_ivs_candidates (struct ivopts_data *data)
2354 {
2355   unsigned i;
2356   struct iv *iv;
2357   bitmap_iterator bi;
2358
2359   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2360     {
2361       iv = ver_info (data, i)->iv;
2362       if (iv && iv->biv_p && !integer_zerop (iv->step))
2363         add_old_iv_candidates (data, iv);
2364     }
2365 }
2366
2367 /* Adds candidates based on the value of the induction variable IV and USE.  */
2368
2369 static void
2370 add_iv_value_candidates (struct ivopts_data *data,
2371                          struct iv *iv, struct iv_use *use)
2372 {
2373   unsigned HOST_WIDE_INT offset;
2374   tree base;
2375   tree basetype;
2376
2377   add_candidate (data, iv->base, iv->step, false, use);
2378
2379   /* The same, but with initial value zero.  Make such variable important,
2380      since it is generic enough so that possibly many uses may be based
2381      on it.  */
2382   basetype = TREE_TYPE (iv->base);
2383   if (POINTER_TYPE_P (basetype))
2384     basetype = sizetype;
2385   add_candidate (data, build_int_cst (basetype, 0),
2386                  iv->step, true, use);
2387
2388   /* Third, try removing the constant offset.  Make sure to even
2389      add a candidate for &a[0] vs. (T *)&a.  */
2390   base = strip_offset (iv->base, &offset);
2391   if (offset
2392       || base != iv->base)
2393     add_candidate (data, base, iv->step, false, use);
2394 }
2395
2396 /* Adds candidates based on the uses.  */
2397
2398 static void
2399 add_derived_ivs_candidates (struct ivopts_data *data)
2400 {
2401   unsigned i;
2402
2403   for (i = 0; i < n_iv_uses (data); i++)
2404     {
2405       struct iv_use *use = iv_use (data, i);
2406
2407       if (!use)
2408         continue;
2409
2410       switch (use->type)
2411         {
2412         case USE_NONLINEAR_EXPR:
2413         case USE_COMPARE:
2414         case USE_ADDRESS:
2415           /* Just add the ivs based on the value of the iv used here.  */
2416           add_iv_value_candidates (data, use->iv, use);
2417           break;
2418
2419         default:
2420           gcc_unreachable ();
2421         }
2422     }
2423 }
2424
2425 /* Record important candidates and add them to related_cands bitmaps
2426    if needed.  */
2427
2428 static void
2429 record_important_candidates (struct ivopts_data *data)
2430 {
2431   unsigned i;
2432   struct iv_use *use;
2433
2434   for (i = 0; i < n_iv_cands (data); i++)
2435     {
2436       struct iv_cand *cand = iv_cand (data, i);
2437
2438       if (cand->important)
2439         bitmap_set_bit (data->important_candidates, i);
2440     }
2441
2442   data->consider_all_candidates = (n_iv_cands (data)
2443                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2444
2445   if (data->consider_all_candidates)
2446     {
2447       /* We will not need "related_cands" bitmaps in this case,
2448          so release them to decrease peak memory consumption.  */
2449       for (i = 0; i < n_iv_uses (data); i++)
2450         {
2451           use = iv_use (data, i);
2452           BITMAP_FREE (use->related_cands);
2453         }
2454     }
2455   else
2456     {
2457       /* Add important candidates to the related_cands bitmaps.  */
2458       for (i = 0; i < n_iv_uses (data); i++)
2459         bitmap_ior_into (iv_use (data, i)->related_cands,
2460                          data->important_candidates);
2461     }
2462 }
2463
2464 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2465    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2466    we allocate a simple list to every use.  */
2467
2468 static void
2469 alloc_use_cost_map (struct ivopts_data *data)
2470 {
2471   unsigned i, size, s, j;
2472
2473   for (i = 0; i < n_iv_uses (data); i++)
2474     {
2475       struct iv_use *use = iv_use (data, i);
2476       bitmap_iterator bi;
2477
2478       if (data->consider_all_candidates)
2479         size = n_iv_cands (data);
2480       else
2481         {
2482           s = 0;
2483           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2484             {
2485               s++;
2486             }
2487
2488           /* Round up to the power of two, so that moduling by it is fast.  */
2489           for (size = 1; size < s; size <<= 1)
2490             continue;
2491         }
2492
2493       use->n_map_members = size;
2494       use->cost_map = XCNEWVEC (struct cost_pair, size);
2495     }
2496 }
2497
2498 /* Returns description of computation cost of expression whose runtime
2499    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2500
2501 static comp_cost
2502 new_cost (unsigned runtime, unsigned complexity)
2503 {
2504   comp_cost cost;
2505
2506   cost.cost = runtime;
2507   cost.complexity = complexity;
2508
2509   return cost;
2510 }
2511
2512 /* Adds costs COST1 and COST2.  */
2513
2514 static comp_cost
2515 add_costs (comp_cost cost1, comp_cost cost2)
2516 {
2517   cost1.cost += cost2.cost;
2518   cost1.complexity += cost2.complexity;
2519
2520   return cost1;
2521 }
2522 /* Subtracts costs COST1 and COST2.  */
2523
2524 static comp_cost
2525 sub_costs (comp_cost cost1, comp_cost cost2)
2526 {
2527   cost1.cost -= cost2.cost;
2528   cost1.complexity -= cost2.complexity;
2529
2530   return cost1;
2531 }
2532
2533 /* Returns a negative number if COST1 < COST2, a positive number if
2534    COST1 > COST2, and 0 if COST1 = COST2.  */
2535
2536 static int
2537 compare_costs (comp_cost cost1, comp_cost cost2)
2538 {
2539   if (cost1.cost == cost2.cost)
2540     return cost1.complexity - cost2.complexity;
2541
2542   return cost1.cost - cost2.cost;
2543 }
2544
2545 /* Returns true if COST is infinite.  */
2546
2547 static bool
2548 infinite_cost_p (comp_cost cost)
2549 {
2550   return cost.cost == INFTY;
2551 }
2552
2553 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2554    on invariants DEPENDS_ON and that the value used in expressing it
2555    is VALUE.  */
2556
2557 static void
2558 set_use_iv_cost (struct ivopts_data *data,
2559                  struct iv_use *use, struct iv_cand *cand,
2560                  comp_cost cost, bitmap depends_on, tree value)
2561 {
2562   unsigned i, s;
2563
2564   if (infinite_cost_p (cost))
2565     {
2566       BITMAP_FREE (depends_on);
2567       return;
2568     }
2569
2570   if (data->consider_all_candidates)
2571     {
2572       use->cost_map[cand->id].cand = cand;
2573       use->cost_map[cand->id].cost = cost;
2574       use->cost_map[cand->id].depends_on = depends_on;
2575       use->cost_map[cand->id].value = value;
2576       return;
2577     }
2578
2579   /* n_map_members is a power of two, so this computes modulo.  */
2580   s = cand->id & (use->n_map_members - 1);
2581   for (i = s; i < use->n_map_members; i++)
2582     if (!use->cost_map[i].cand)
2583       goto found;
2584   for (i = 0; i < s; i++)
2585     if (!use->cost_map[i].cand)
2586       goto found;
2587
2588   gcc_unreachable ();
2589
2590 found:
2591   use->cost_map[i].cand = cand;
2592   use->cost_map[i].cost = cost;
2593   use->cost_map[i].depends_on = depends_on;
2594   use->cost_map[i].value = value;
2595 }
2596
2597 /* Gets cost of (USE, CANDIDATE) pair.  */
2598
2599 static struct cost_pair *
2600 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2601                  struct iv_cand *cand)
2602 {
2603   unsigned i, s;
2604   struct cost_pair *ret;
2605
2606   if (!cand)
2607     return NULL;
2608
2609   if (data->consider_all_candidates)
2610     {
2611       ret = use->cost_map + cand->id;
2612       if (!ret->cand)
2613         return NULL;
2614
2615       return ret;
2616     }
2617
2618   /* n_map_members is a power of two, so this computes modulo.  */
2619   s = cand->id & (use->n_map_members - 1);
2620   for (i = s; i < use->n_map_members; i++)
2621     if (use->cost_map[i].cand == cand)
2622       return use->cost_map + i;
2623
2624   for (i = 0; i < s; i++)
2625     if (use->cost_map[i].cand == cand)
2626       return use->cost_map + i;
2627
2628   return NULL;
2629 }
2630
2631 /* Returns estimate on cost of computing SEQ.  */
2632
2633 static unsigned
2634 seq_cost (rtx seq, bool speed)
2635 {
2636   unsigned cost = 0;
2637   rtx set;
2638
2639   for (; seq; seq = NEXT_INSN (seq))
2640     {
2641       set = single_set (seq);
2642       if (set)
2643         cost += rtx_cost (set, SET,speed);
2644       else
2645         cost++;
2646     }
2647
2648   return cost;
2649 }
2650
2651 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2652 static rtx
2653 produce_memory_decl_rtl (tree obj, int *regno)
2654 {
2655   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2656   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2657   rtx x;
2658
2659   gcc_assert (obj);
2660   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2661     {
2662       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2663       x = gen_rtx_SYMBOL_REF (address_mode, name);
2664       SET_SYMBOL_REF_DECL (x, obj);
2665       x = gen_rtx_MEM (DECL_MODE (obj), x);
2666       set_mem_addr_space (x, as);
2667       targetm.encode_section_info (obj, x, true);
2668     }
2669   else
2670     {
2671       x = gen_raw_REG (address_mode, (*regno)++);
2672       x = gen_rtx_MEM (DECL_MODE (obj), x);
2673       set_mem_addr_space (x, as);
2674     }
2675
2676   return x;
2677 }
2678
2679 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2680    walk_tree.  DATA contains the actual fake register number.  */
2681
2682 static tree
2683 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2684 {
2685   tree obj = NULL_TREE;
2686   rtx x = NULL_RTX;
2687   int *regno = (int *) data;
2688
2689   switch (TREE_CODE (*expr_p))
2690     {
2691     case ADDR_EXPR:
2692       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2693            handled_component_p (*expr_p);
2694            expr_p = &TREE_OPERAND (*expr_p, 0))
2695         continue;
2696       obj = *expr_p;
2697       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2698         x = produce_memory_decl_rtl (obj, regno);
2699       break;
2700
2701     case SSA_NAME:
2702       *ws = 0;
2703       obj = SSA_NAME_VAR (*expr_p);
2704       if (!DECL_RTL_SET_P (obj))
2705         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2706       break;
2707
2708     case VAR_DECL:
2709     case PARM_DECL:
2710     case RESULT_DECL:
2711       *ws = 0;
2712       obj = *expr_p;
2713
2714       if (DECL_RTL_SET_P (obj))
2715         break;
2716
2717       if (DECL_MODE (obj) == BLKmode)
2718         x = produce_memory_decl_rtl (obj, regno);
2719       else
2720         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2721
2722       break;
2723
2724     default:
2725       break;
2726     }
2727
2728   if (x)
2729     {
2730       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2731       SET_DECL_RTL (obj, x);
2732     }
2733
2734   return NULL_TREE;
2735 }
2736
2737 /* Determines cost of the computation of EXPR.  */
2738
2739 static unsigned
2740 computation_cost (tree expr, bool speed)
2741 {
2742   rtx seq, rslt;
2743   tree type = TREE_TYPE (expr);
2744   unsigned cost;
2745   /* Avoid using hard regs in ways which may be unsupported.  */
2746   int regno = LAST_VIRTUAL_REGISTER + 1;
2747   struct cgraph_node *node = cgraph_node (current_function_decl);
2748   enum node_frequency real_frequency = node->frequency;
2749
2750   node->frequency = NODE_FREQUENCY_NORMAL;
2751   crtl->maybe_hot_insn_p = speed;
2752   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2753   start_sequence ();
2754   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2755   seq = get_insns ();
2756   end_sequence ();
2757   default_rtl_profile ();
2758   node->frequency = real_frequency;
2759
2760   cost = seq_cost (seq, speed);
2761   if (MEM_P (rslt))
2762     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2763                           TYPE_ADDR_SPACE (type), speed);
2764
2765   return cost;
2766 }
2767
2768 /* Returns variable containing the value of candidate CAND at statement AT.  */
2769
2770 static tree
2771 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2772 {
2773   if (stmt_after_increment (loop, cand, stmt))
2774     return cand->var_after;
2775   else
2776     return cand->var_before;
2777 }
2778
2779 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2780    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2781
2782 int
2783 tree_int_cst_sign_bit (const_tree t)
2784 {
2785   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2786   unsigned HOST_WIDE_INT w;
2787
2788   if (bitno < HOST_BITS_PER_WIDE_INT)
2789     w = TREE_INT_CST_LOW (t);
2790   else
2791     {
2792       w = TREE_INT_CST_HIGH (t);
2793       bitno -= HOST_BITS_PER_WIDE_INT;
2794     }
2795
2796   return (w >> bitno) & 1;
2797 }
2798
2799 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2800    same precision that is at least as wide as the precision of TYPE, stores
2801    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2802    type of A and B.  */
2803
2804 static tree
2805 determine_common_wider_type (tree *a, tree *b)
2806 {
2807   tree wider_type = NULL;
2808   tree suba, subb;
2809   tree atype = TREE_TYPE (*a);
2810
2811   if (CONVERT_EXPR_P (*a))
2812     {
2813       suba = TREE_OPERAND (*a, 0);
2814       wider_type = TREE_TYPE (suba);
2815       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2816         return atype;
2817     }
2818   else
2819     return atype;
2820
2821   if (CONVERT_EXPR_P (*b))
2822     {
2823       subb = TREE_OPERAND (*b, 0);
2824       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2825         return atype;
2826     }
2827   else
2828     return atype;
2829
2830   *a = suba;
2831   *b = subb;
2832   return wider_type;
2833 }
2834
2835 /* Determines the expression by that USE is expressed from induction variable
2836    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2837    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2838
2839 static bool
2840 get_computation_aff (struct loop *loop,
2841                      struct iv_use *use, struct iv_cand *cand, gimple at,
2842                      struct affine_tree_combination *aff)
2843 {
2844   tree ubase = use->iv->base;
2845   tree ustep = use->iv->step;
2846   tree cbase = cand->iv->base;
2847   tree cstep = cand->iv->step, cstep_common;
2848   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2849   tree common_type, var;
2850   tree uutype;
2851   aff_tree cbase_aff, var_aff;
2852   double_int rat;
2853
2854   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2855     {
2856       /* We do not have a precision to express the values of use.  */
2857       return false;
2858     }
2859
2860   var = var_at_stmt (loop, cand, at);
2861   uutype = unsigned_type_for (utype);
2862
2863   /* If the conversion is not noop, perform it.  */
2864   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2865     {
2866       cstep = fold_convert (uutype, cstep);
2867       cbase = fold_convert (uutype, cbase);
2868       var = fold_convert (uutype, var);
2869     }
2870
2871   if (!constant_multiple_of (ustep, cstep, &rat))
2872     return false;
2873
2874   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2875      type, we achieve better folding by computing their difference in this
2876      wider type, and cast the result to UUTYPE.  We do not need to worry about
2877      overflows, as all the arithmetics will in the end be performed in UUTYPE
2878      anyway.  */
2879   common_type = determine_common_wider_type (&ubase, &cbase);
2880
2881   /* use = ubase - ratio * cbase + ratio * var.  */
2882   tree_to_aff_combination (ubase, common_type, aff);
2883   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2884   tree_to_aff_combination (var, uutype, &var_aff);
2885
2886   /* We need to shift the value if we are after the increment.  */
2887   if (stmt_after_increment (loop, cand, at))
2888     {
2889       aff_tree cstep_aff;
2890
2891       if (common_type != uutype)
2892         cstep_common = fold_convert (common_type, cstep);
2893       else
2894         cstep_common = cstep;
2895
2896       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2897       aff_combination_add (&cbase_aff, &cstep_aff);
2898     }
2899
2900   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2901   aff_combination_add (aff, &cbase_aff);
2902   if (common_type != uutype)
2903     aff_combination_convert (aff, uutype);
2904
2905   aff_combination_scale (&var_aff, rat);
2906   aff_combination_add (aff, &var_aff);
2907
2908   return true;
2909 }
2910
2911 /* Determines the expression by that USE is expressed from induction variable
2912    CAND at statement AT in LOOP.  The computation is unshared.  */
2913
2914 static tree
2915 get_computation_at (struct loop *loop,
2916                     struct iv_use *use, struct iv_cand *cand, gimple at)
2917 {
2918   aff_tree aff;
2919   tree type = TREE_TYPE (use->iv->base);
2920
2921   if (!get_computation_aff (loop, use, cand, at, &aff))
2922     return NULL_TREE;
2923   unshare_aff_combination (&aff);
2924   return fold_convert (type, aff_combination_to_tree (&aff));
2925 }
2926
2927 /* Determines the expression by that USE is expressed from induction variable
2928    CAND in LOOP.  The computation is unshared.  */
2929
2930 static tree
2931 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2932 {
2933   return get_computation_at (loop, use, cand, use->stmt);
2934 }
2935
2936 /* Returns cost of addition in MODE.  */
2937
2938 static unsigned
2939 add_cost (enum machine_mode mode, bool speed)
2940 {
2941   static unsigned costs[NUM_MACHINE_MODES];
2942   rtx seq;
2943   unsigned cost;
2944
2945   if (costs[mode])
2946     return costs[mode];
2947
2948   start_sequence ();
2949   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2950                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2951                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2952                  NULL_RTX);
2953   seq = get_insns ();
2954   end_sequence ();
2955
2956   cost = seq_cost (seq, speed);
2957   if (!cost)
2958     cost = 1;
2959
2960   costs[mode] = cost;
2961
2962   if (dump_file && (dump_flags & TDF_DETAILS))
2963     fprintf (dump_file, "Addition in %s costs %d\n",
2964              GET_MODE_NAME (mode), cost);
2965   return cost;
2966 }
2967
2968 /* Entry in a hashtable of already known costs for multiplication.  */
2969 struct mbc_entry
2970 {
2971   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2972   enum machine_mode mode;       /* In mode.  */
2973   unsigned cost;                /* The cost.  */
2974 };
2975
2976 /* Counts hash value for the ENTRY.  */
2977
2978 static hashval_t
2979 mbc_entry_hash (const void *entry)
2980 {
2981   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2982
2983   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2984 }
2985
2986 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2987
2988 static int
2989 mbc_entry_eq (const void *entry1, const void *entry2)
2990 {
2991   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2992   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2993
2994   return (e1->mode == e2->mode
2995           && e1->cst == e2->cst);
2996 }
2997
2998 /* Returns cost of multiplication by constant CST in MODE.  */
2999
3000 unsigned
3001 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3002 {
3003   static htab_t costs;
3004   struct mbc_entry **cached, act;
3005   rtx seq;
3006   unsigned cost;
3007
3008   if (!costs)
3009     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3010
3011   act.mode = mode;
3012   act.cst = cst;
3013   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3014   if (*cached)
3015     return (*cached)->cost;
3016
3017   *cached = XNEW (struct mbc_entry);
3018   (*cached)->mode = mode;
3019   (*cached)->cst = cst;
3020
3021   start_sequence ();
3022   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3023                gen_int_mode (cst, mode), NULL_RTX, 0);
3024   seq = get_insns ();
3025   end_sequence ();
3026
3027   cost = seq_cost (seq, speed);
3028
3029   if (dump_file && (dump_flags & TDF_DETAILS))
3030     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3031              (int) cst, GET_MODE_NAME (mode), cost);
3032
3033   (*cached)->cost = cost;
3034
3035   return cost;
3036 }
3037
3038 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3039    validity for a memory reference accessing memory of mode MODE in
3040    address space AS.  */
3041
3042 DEF_VEC_P (sbitmap);
3043 DEF_VEC_ALLOC_P (sbitmap, heap);
3044
3045 bool
3046 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3047                                  addr_space_t as)
3048 {
3049 #define MAX_RATIO 128
3050   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3051   static VEC (sbitmap, heap) *valid_mult_list;
3052   sbitmap valid_mult;
3053
3054   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3055     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3056
3057   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3058   if (!valid_mult)
3059     {
3060       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3061       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3062       rtx addr;
3063       HOST_WIDE_INT i;
3064
3065       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3066       sbitmap_zero (valid_mult);
3067       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3068       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3069         {
3070           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3071           if (memory_address_addr_space_p (mode, addr, as))
3072             SET_BIT (valid_mult, i + MAX_RATIO);
3073         }
3074
3075       if (dump_file && (dump_flags & TDF_DETAILS))
3076         {
3077           fprintf (dump_file, "  allowed multipliers:");
3078           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3079             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3080               fprintf (dump_file, " %d", (int) i);
3081           fprintf (dump_file, "\n");
3082           fprintf (dump_file, "\n");
3083         }
3084
3085       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3086     }
3087
3088   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3089     return false;
3090
3091   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3092 }
3093
3094 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3095    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3096    variable is omitted.  Compute the cost for a memory reference that accesses
3097    a memory location of mode MEM_MODE in address space AS.
3098
3099    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3100    size of MEM_MODE / RATIO) is available.  To make this determination, we
3101    look at the size of the increment to be made, which is given in CSTEP.
3102    CSTEP may be zero if the step is unknown.
3103    STMT_AFTER_INC is true iff the statement we're looking at is after the
3104    increment of the original biv.
3105
3106    TODO -- there must be some better way.  This all is quite crude.  */
3107
3108 typedef struct
3109 {
3110   HOST_WIDE_INT min_offset, max_offset;
3111   unsigned costs[2][2][2][2];
3112 } *address_cost_data;
3113
3114 DEF_VEC_P (address_cost_data);
3115 DEF_VEC_ALLOC_P (address_cost_data, heap);
3116
3117 static comp_cost
3118 get_address_cost (bool symbol_present, bool var_present,
3119                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3120                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3121                   addr_space_t as, bool speed,
3122                   bool stmt_after_inc, bool *may_autoinc)
3123 {
3124   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3125   static VEC(address_cost_data, heap) *address_cost_data_list;
3126   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3127   address_cost_data data;
3128   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3129   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3130   unsigned cost, acost, complexity;
3131   bool offset_p, ratio_p, autoinc;
3132   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3133   unsigned HOST_WIDE_INT mask;
3134   unsigned bits;
3135
3136   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3137     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3138                            data_index + 1);
3139
3140   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3141   if (!data)
3142     {
3143       HOST_WIDE_INT i;
3144       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3145       HOST_WIDE_INT rat, off;
3146       int old_cse_not_expected;
3147       unsigned sym_p, var_p, off_p, rat_p, add_c;
3148       rtx seq, addr, base;
3149       rtx reg0, reg1;
3150
3151       data = (address_cost_data) xcalloc (1, sizeof (*data));
3152
3153       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3154
3155       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3156       for (i = start; i <= 1 << 20; i <<= 1)
3157         {
3158           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3159           if (!memory_address_addr_space_p (mem_mode, addr, as))
3160             break;
3161         }
3162       data->max_offset = i == start ? 0 : i >> 1;
3163       off = data->max_offset;
3164
3165       for (i = start; i <= 1 << 20; i <<= 1)
3166         {
3167           XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3168           if (!memory_address_addr_space_p (mem_mode, addr, as))
3169             break;
3170         }
3171       data->min_offset = i == start ? 0 : -(i >> 1);
3172
3173       if (dump_file && (dump_flags & TDF_DETAILS))
3174         {
3175           fprintf (dump_file, "get_address_cost:\n");
3176           fprintf (dump_file, "  min offset %s %d\n",
3177                    GET_MODE_NAME (mem_mode),
3178                    (int) data->min_offset);
3179           fprintf (dump_file, "  max offset %s %d\n",
3180                    GET_MODE_NAME (mem_mode),
3181                    (int) data->max_offset);
3182         }
3183
3184       rat = 1;
3185       for (i = 2; i <= MAX_RATIO; i++)
3186         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3187           {
3188             rat = i;
3189             break;
3190           }
3191
3192       /* Compute the cost of various addressing modes.  */
3193       acost = 0;
3194       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3195       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3196
3197       if (HAVE_PRE_DECREMENT)
3198         {
3199           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3200           has_predec[mem_mode]
3201             = memory_address_addr_space_p (mem_mode, addr, as);
3202         }
3203       if (HAVE_POST_DECREMENT)
3204         {
3205           addr = gen_rtx_POST_DEC (address_mode, reg0);
3206           has_postdec[mem_mode]
3207             = memory_address_addr_space_p (mem_mode, addr, as);
3208         }
3209       if (HAVE_PRE_INCREMENT)
3210         {
3211           addr = gen_rtx_PRE_INC (address_mode, reg0);
3212           has_preinc[mem_mode]
3213             = memory_address_addr_space_p (mem_mode, addr, as);
3214         }
3215       if (HAVE_POST_INCREMENT)
3216         {
3217           addr = gen_rtx_POST_INC (address_mode, reg0);
3218           has_postinc[mem_mode]
3219             = memory_address_addr_space_p (mem_mode, addr, as);
3220         }
3221       for (i = 0; i < 16; i++)
3222         {
3223           sym_p = i & 1;
3224           var_p = (i >> 1) & 1;
3225           off_p = (i >> 2) & 1;
3226           rat_p = (i >> 3) & 1;
3227
3228           addr = reg0;
3229           if (rat_p)
3230             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3231                                    gen_int_mode (rat, address_mode));
3232
3233           if (var_p)
3234             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3235
3236           if (sym_p)
3237             {
3238               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3239               /* ??? We can run into trouble with some backends by presenting
3240                  it with symbols which haven't been properly passed through
3241                  targetm.encode_section_info.  By setting the local bit, we
3242                  enhance the probability of things working.  */
3243               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3244
3245               if (off_p)
3246                 base = gen_rtx_fmt_e (CONST, address_mode,
3247                                       gen_rtx_fmt_ee
3248                                         (PLUS, address_mode, base,
3249                                          gen_int_mode (off, address_mode)));
3250             }
3251           else if (off_p)
3252             base = gen_int_mode (off, address_mode);
3253           else
3254             base = NULL_RTX;
3255
3256           if (base)
3257             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3258
3259           start_sequence ();
3260           /* To avoid splitting addressing modes, pretend that no cse will
3261              follow.  */
3262           old_cse_not_expected = cse_not_expected;
3263           cse_not_expected = true;
3264           addr = memory_address_addr_space (mem_mode, addr, as);
3265           cse_not_expected = old_cse_not_expected;
3266           seq = get_insns ();
3267           end_sequence ();
3268
3269           acost = seq_cost (seq, speed);
3270           acost += address_cost (addr, mem_mode, as, speed);
3271
3272           if (!acost)
3273             acost = 1;
3274           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3275         }
3276
3277       /* On some targets, it is quite expensive to load symbol to a register,
3278          which makes addresses that contain symbols look much more expensive.
3279          However, the symbol will have to be loaded in any case before the
3280          loop (and quite likely we have it in register already), so it does not
3281          make much sense to penalize them too heavily.  So make some final
3282          tweaks for the SYMBOL_PRESENT modes:
3283
3284          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3285          var is cheaper, use this mode with small penalty.
3286          If VAR_PRESENT is true, try whether the mode with
3287          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3288          if this is the case, use it.  */
3289       add_c = add_cost (address_mode, speed);
3290       for (i = 0; i < 8; i++)
3291         {
3292           var_p = i & 1;
3293           off_p = (i >> 1) & 1;
3294           rat_p = (i >> 2) & 1;
3295
3296           acost = data->costs[0][1][off_p][rat_p] + 1;
3297           if (var_p)
3298             acost += add_c;
3299
3300           if (acost < data->costs[1][var_p][off_p][rat_p])
3301             data->costs[1][var_p][off_p][rat_p] = acost;
3302         }
3303
3304       if (dump_file && (dump_flags & TDF_DETAILS))
3305         {
3306           fprintf (dump_file, "Address costs:\n");
3307
3308           for (i = 0; i < 16; i++)
3309             {
3310               sym_p = i & 1;
3311               var_p = (i >> 1) & 1;
3312               off_p = (i >> 2) & 1;
3313               rat_p = (i >> 3) & 1;
3314
3315               fprintf (dump_file, "  ");
3316               if (sym_p)
3317                 fprintf (dump_file, "sym + ");
3318               if (var_p)
3319                 fprintf (dump_file, "var + ");
3320               if (off_p)
3321                 fprintf (dump_file, "cst + ");
3322               if (rat_p)
3323                 fprintf (dump_file, "rat * ");
3324
3325               acost = data->costs[sym_p][var_p][off_p][rat_p];
3326               fprintf (dump_file, "index costs %d\n", acost);
3327             }
3328           if (has_predec[mem_mode] || has_postdec[mem_mode]
3329               || has_preinc[mem_mode] || has_postinc[mem_mode])
3330             fprintf (dump_file, "  May include autoinc/dec\n");
3331           fprintf (dump_file, "\n");
3332         }
3333
3334       VEC_replace (address_cost_data, address_cost_data_list,
3335                    data_index, data);
3336     }
3337
3338   bits = GET_MODE_BITSIZE (address_mode);
3339   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3340   offset &= mask;
3341   if ((offset >> (bits - 1) & 1))
3342     offset |= ~mask;
3343   s_offset = offset;
3344
3345   autoinc = false;
3346   msize = GET_MODE_SIZE (mem_mode);
3347   autoinc_offset = offset;
3348   if (stmt_after_inc)
3349     autoinc_offset += ratio * cstep;
3350   if (symbol_present || var_present || ratio != 1)
3351     autoinc = false;
3352   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3353                && msize == cstep)
3354            || (has_postdec[mem_mode] && autoinc_offset == 0
3355                && msize == -cstep)
3356            || (has_preinc[mem_mode] && autoinc_offset == msize
3357                && msize == cstep)
3358            || (has_predec[mem_mode] && autoinc_offset == -msize
3359                && msize == -cstep))
3360     autoinc = true;
3361
3362   cost = 0;
3363   offset_p = (s_offset != 0
3364               && data->min_offset <= s_offset
3365               && s_offset <= data->max_offset);
3366   ratio_p = (ratio != 1
3367              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3368
3369   if (ratio != 1 && !ratio_p)
3370     cost += multiply_by_cost (ratio, address_mode, speed);
3371
3372   if (s_offset && !offset_p && !symbol_present)
3373     cost += add_cost (address_mode, speed);
3374
3375   if (may_autoinc)
3376     *may_autoinc = autoinc;
3377   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3378   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3379   return new_cost (cost + acost, complexity);
3380 }
3381
3382 /* Estimates cost of forcing expression EXPR into a variable.  */
3383
3384 static comp_cost
3385 force_expr_to_var_cost (tree expr, bool speed)
3386 {
3387   static bool costs_initialized = false;
3388   static unsigned integer_cost [2];
3389   static unsigned symbol_cost [2];
3390   static unsigned address_cost [2];
3391   tree op0, op1;
3392   comp_cost cost0, cost1, cost;
3393   enum machine_mode mode;
3394
3395   if (!costs_initialized)
3396     {
3397       tree type = build_pointer_type (integer_type_node);
3398       tree var, addr;
3399       rtx x;
3400       int i;
3401
3402       var = create_tmp_var_raw (integer_type_node, "test_var");
3403       TREE_STATIC (var) = 1;
3404       x = produce_memory_decl_rtl (var, NULL);
3405       SET_DECL_RTL (var, x);
3406
3407       addr = build1 (ADDR_EXPR, type, var);
3408
3409
3410       for (i = 0; i < 2; i++)
3411         {
3412           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3413                                                              2000), i);
3414
3415           symbol_cost[i] = computation_cost (addr, i) + 1;
3416
3417           address_cost[i]
3418             = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3419                                         addr,
3420                                         build_int_cst (sizetype, 2000)), i) + 1;
3421           if (dump_file && (dump_flags & TDF_DETAILS))
3422             {
3423               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3424               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3425               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3426               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3427               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3428               fprintf (dump_file, "\n");
3429             }
3430         }
3431
3432       costs_initialized = true;
3433     }
3434
3435   STRIP_NOPS (expr);
3436
3437   if (SSA_VAR_P (expr))
3438     return zero_cost;
3439
3440   if (is_gimple_min_invariant (expr))
3441     {
3442       if (TREE_CODE (expr) == INTEGER_CST)
3443         return new_cost (integer_cost [speed], 0);
3444
3445       if (TREE_CODE (expr) == ADDR_EXPR)
3446         {
3447           tree obj = TREE_OPERAND (expr, 0);
3448
3449           if (TREE_CODE (obj) == VAR_DECL
3450               || TREE_CODE (obj) == PARM_DECL
3451               || TREE_CODE (obj) == RESULT_DECL)
3452             return new_cost (symbol_cost [speed], 0);
3453         }
3454
3455       return new_cost (address_cost [speed], 0);
3456     }
3457
3458   switch (TREE_CODE (expr))
3459     {
3460     case POINTER_PLUS_EXPR:
3461     case PLUS_EXPR:
3462     case MINUS_EXPR:
3463     case MULT_EXPR:
3464       op0 = TREE_OPERAND (expr, 0);
3465       op1 = TREE_OPERAND (expr, 1);
3466       STRIP_NOPS (op0);
3467       STRIP_NOPS (op1);
3468
3469       if (is_gimple_val (op0))
3470         cost0 = zero_cost;
3471       else
3472         cost0 = force_expr_to_var_cost (op0, speed);
3473
3474       if (is_gimple_val (op1))
3475         cost1 = zero_cost;
3476       else
3477         cost1 = force_expr_to_var_cost (op1, speed);
3478
3479       break;
3480
3481     case NEGATE_EXPR:
3482       op0 = TREE_OPERAND (expr, 0);
3483       STRIP_NOPS (op0);
3484       op1 = NULL_TREE;
3485
3486       if (is_gimple_val (op0))
3487         cost0 = zero_cost;
3488       else
3489         cost0 = force_expr_to_var_cost (op0, speed);
3490
3491       cost1 = zero_cost;
3492       break;
3493
3494     default:
3495       /* Just an arbitrary value, FIXME.  */
3496       return new_cost (target_spill_cost[speed], 0);
3497     }
3498
3499   mode = TYPE_MODE (TREE_TYPE (expr));
3500   switch (TREE_CODE (expr))
3501     {
3502     case POINTER_PLUS_EXPR:
3503     case PLUS_EXPR:
3504     case MINUS_EXPR:
3505     case NEGATE_EXPR:
3506       cost = new_cost (add_cost (mode, speed), 0);
3507       break;
3508
3509     case MULT_EXPR:
3510       if (cst_and_fits_in_hwi (op0))
3511         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3512       else if (cst_and_fits_in_hwi (op1))
3513         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3514       else
3515         return new_cost (target_spill_cost [speed], 0);
3516       break;
3517
3518     default:
3519       gcc_unreachable ();
3520     }
3521
3522   cost = add_costs (cost, cost0);
3523   cost = add_costs (cost, cost1);
3524
3525   /* Bound the cost by target_spill_cost.  The parts of complicated
3526      computations often are either loop invariant or at least can
3527      be shared between several iv uses, so letting this grow without
3528      limits would not give reasonable results.  */
3529   if (cost.cost > (int) target_spill_cost [speed])
3530     cost.cost = target_spill_cost [speed];
3531
3532   return cost;
3533 }
3534
3535 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3536    invariants the computation depends on.  */
3537
3538 static comp_cost
3539 force_var_cost (struct ivopts_data *data,
3540                 tree expr, bitmap *depends_on)
3541 {
3542   if (depends_on)
3543     {
3544       fd_ivopts_data = data;
3545       walk_tree (&expr, find_depends, depends_on, NULL);
3546     }
3547
3548   return force_expr_to_var_cost (expr, data->speed);
3549 }
3550
3551 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3552    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3553    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3554    invariants the computation depends on.  */
3555
3556 static comp_cost
3557 split_address_cost (struct ivopts_data *data,
3558                     tree addr, bool *symbol_present, bool *var_present,
3559                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3560 {
3561   tree core;
3562   HOST_WIDE_INT bitsize;
3563   HOST_WIDE_INT bitpos;
3564   tree toffset;
3565   enum machine_mode mode;
3566   int unsignedp, volatilep;
3567
3568   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3569                               &unsignedp, &volatilep, false);
3570
3571   if (toffset != 0
3572       || bitpos % BITS_PER_UNIT != 0
3573       || TREE_CODE (core) != VAR_DECL)
3574     {
3575       *symbol_present = false;
3576       *var_present = true;
3577       fd_ivopts_data = data;
3578       walk_tree (&addr, find_depends, depends_on, NULL);
3579       return new_cost (target_spill_cost[data->speed], 0);
3580     }
3581
3582   *offset += bitpos / BITS_PER_UNIT;
3583   if (TREE_STATIC (core)
3584       || DECL_EXTERNAL (core))
3585     {
3586       *symbol_present = true;
3587       *var_present = false;
3588       return zero_cost;
3589     }
3590
3591   *symbol_present = false;
3592   *var_present = true;
3593   return zero_cost;
3594 }
3595
3596 /* Estimates cost of expressing difference of addresses E1 - E2 as
3597    var + symbol + offset.  The value of offset is added to OFFSET,
3598    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3599    part is missing.  DEPENDS_ON is a set of the invariants the computation
3600    depends on.  */
3601
3602 static comp_cost
3603 ptr_difference_cost (struct ivopts_data *data,
3604                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3605                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3606 {
3607   HOST_WIDE_INT diff = 0;
3608   aff_tree aff_e1, aff_e2;
3609   tree type;
3610
3611   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3612
3613   if (ptr_difference_const (e1, e2, &diff))
3614     {
3615       *offset += diff;
3616       *symbol_present = false;
3617       *var_present = false;
3618       return zero_cost;
3619     }
3620
3621   if (integer_zerop (e2))
3622     return split_address_cost (data, TREE_OPERAND (e1, 0),
3623                                symbol_present, var_present, offset, depends_on);
3624
3625   *symbol_present = false;
3626   *var_present = true;
3627
3628   type = signed_type_for (TREE_TYPE (e1));
3629   tree_to_aff_combination (e1, type, &aff_e1);
3630   tree_to_aff_combination (e2, type, &aff_e2);
3631   aff_combination_scale (&aff_e2, double_int_minus_one);
3632   aff_combination_add (&aff_e1, &aff_e2);
3633
3634   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3635 }
3636
3637 /* Estimates cost of expressing difference E1 - E2 as
3638    var + symbol + offset.  The value of offset is added to OFFSET,
3639    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3640    part is missing.  DEPENDS_ON is a set of the invariants the computation
3641    depends on.  */
3642
3643 static comp_cost
3644 difference_cost (struct ivopts_data *data,
3645                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3646                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3647 {
3648   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3649   unsigned HOST_WIDE_INT off1, off2;
3650   aff_tree aff_e1, aff_e2;
3651   tree type;
3652
3653   e1 = strip_offset (e1, &off1);
3654   e2 = strip_offset (e2, &off2);
3655   *offset += off1 - off2;
3656
3657   STRIP_NOPS (e1);
3658   STRIP_NOPS (e2);
3659
3660   if (TREE_CODE (e1) == ADDR_EXPR)
3661     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3662                                 offset, depends_on);
3663   *symbol_present = false;
3664
3665   if (operand_equal_p (e1, e2, 0))
3666     {
3667       *var_present = false;
3668       return zero_cost;
3669     }
3670
3671   *var_present = true;
3672
3673   if (integer_zerop (e2))
3674     return force_var_cost (data, e1, depends_on);
3675
3676   if (integer_zerop (e1))
3677     {
3678       comp_cost cost = force_var_cost (data, e2, depends_on);
3679       cost.cost += multiply_by_cost (-1, mode, data->speed);
3680       return cost;
3681     }
3682
3683   type = signed_type_for (TREE_TYPE (e1));
3684   tree_to_aff_combination (e1, type, &aff_e1);
3685   tree_to_aff_combination (e2, type, &aff_e2);
3686   aff_combination_scale (&aff_e2, double_int_minus_one);
3687   aff_combination_add (&aff_e1, &aff_e2);
3688
3689   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3690 }
3691
3692 /* Determines the cost of the computation by that USE is expressed
3693    from induction variable CAND.  If ADDRESS_P is true, we just need
3694    to create an address from it, otherwise we want to get it into
3695    register.  A set of invariants we depend on is stored in
3696    DEPENDS_ON.  AT is the statement at that the value is computed.
3697    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3698    addressing is likely.  */
3699
3700 static comp_cost
3701 get_computation_cost_at (struct ivopts_data *data,
3702                          struct iv_use *use, struct iv_cand *cand,
3703                          bool address_p, bitmap *depends_on, gimple at,
3704                          bool *can_autoinc)
3705 {
3706   tree ubase = use->iv->base, ustep = use->iv->step;
3707   tree cbase, cstep;
3708   tree utype = TREE_TYPE (ubase), ctype;
3709   unsigned HOST_WIDE_INT cstepi, offset = 0;
3710   HOST_WIDE_INT ratio, aratio;
3711   bool var_present, symbol_present, stmt_is_after_inc;
3712   comp_cost cost;
3713   double_int rat;
3714   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3715
3716   *depends_on = NULL;
3717
3718   /* Only consider real candidates.  */
3719   if (!cand->iv)
3720     return infinite_cost;
3721
3722   cbase = cand->iv->base;
3723   cstep = cand->iv->step;
3724   ctype = TREE_TYPE (cbase);
3725
3726   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3727     {
3728       /* We do not have a precision to express the values of use.  */
3729       return infinite_cost;
3730     }
3731
3732   if (address_p)
3733     {
3734       /* Do not try to express address of an object with computation based
3735          on address of a different object.  This may cause problems in rtl
3736          level alias analysis (that does not expect this to be happening,
3737          as this is illegal in C), and would be unlikely to be useful
3738          anyway.  */
3739       if (use->iv->base_object
3740           && cand->iv->base_object
3741           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3742         return infinite_cost;
3743     }
3744
3745   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3746     {
3747       /* TODO -- add direct handling of this case.  */
3748       goto fallback;
3749     }
3750
3751   /* CSTEPI is removed from the offset in case statement is after the
3752      increment.  If the step is not constant, we use zero instead.
3753      This is a bit imprecise (there is the extra addition), but
3754      redundancy elimination is likely to transform the code so that
3755      it uses value of the variable before increment anyway,
3756      so it is not that much unrealistic.  */
3757   if (cst_and_fits_in_hwi (cstep))
3758     cstepi = int_cst_value (cstep);
3759   else
3760     cstepi = 0;
3761
3762   if (!constant_multiple_of (ustep, cstep, &rat))
3763     return infinite_cost;
3764
3765   if (double_int_fits_in_shwi_p (rat))
3766     ratio = double_int_to_shwi (rat);
3767   else
3768     return infinite_cost;
3769
3770   STRIP_NOPS (cbase);
3771   ctype = TREE_TYPE (cbase);
3772
3773   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3774      or ratio == 1, it is better to handle this like
3775
3776      ubase - ratio * cbase + ratio * var
3777
3778      (also holds in the case ratio == -1, TODO.  */
3779
3780   if (cst_and_fits_in_hwi (cbase))
3781     {
3782       offset = - ratio * int_cst_value (cbase);
3783       cost = difference_cost (data,
3784                               ubase, build_int_cst (utype, 0),
3785                               &symbol_present, &var_present, &offset,
3786                               depends_on);
3787     }
3788   else if (ratio == 1)
3789     {
3790       cost = difference_cost (data,
3791                               ubase, cbase,
3792                               &symbol_present, &var_present, &offset,
3793                               depends_on);
3794     }
3795   else if (address_p
3796            && !POINTER_TYPE_P (ctype)
3797            && multiplier_allowed_in_address_p
3798                 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3799                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3800     {
3801       cbase
3802         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3803       cost = difference_cost (data,
3804                               ubase, cbase,
3805                               &symbol_present, &var_present, &offset,
3806                               depends_on);
3807     }
3808   else
3809     {
3810       cost = force_var_cost (data, cbase, depends_on);
3811       cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3812       cost = add_costs (cost,
3813                         difference_cost (data,
3814                                          ubase, build_int_cst (utype, 0),
3815                                          &symbol_present, &var_present,
3816                                          &offset, depends_on));
3817     }
3818
3819   /* If we are after the increment, the value of the candidate is higher by
3820      one iteration.  */
3821   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3822   if (stmt_is_after_inc)
3823     offset -= ratio * cstepi;
3824
3825   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3826      (symbol/var1/const parts may be omitted).  If we are looking for an
3827      address, find the cost of addressing this.  */
3828   if (address_p)
3829     return add_costs (cost,
3830                       get_address_cost (symbol_present, var_present,
3831                                         offset, ratio, cstepi,
3832                                         TYPE_MODE (TREE_TYPE (utype)),
3833                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3834                                         speed, stmt_is_after_inc,
3835                                         can_autoinc));
3836
3837   /* Otherwise estimate the costs for computing the expression.  */
3838   if (!symbol_present && !var_present && !offset)
3839     {
3840       if (ratio != 1)
3841         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3842       return cost;
3843     }
3844
3845   /* Symbol + offset should be compile-time computable so consider that they
3846       are added once to the variable, if present.  */
3847   if (var_present && (symbol_present || offset))
3848     cost.cost += add_cost (TYPE_MODE (ctype), speed)
3849                  / AVG_LOOP_NITER (data->current_loop);
3850
3851   /* Having offset does not affect runtime cost in case it is added to
3852      symbol, but it increases complexity.  */
3853   if (offset)
3854     cost.complexity++;
3855
3856   cost.cost += add_cost (TYPE_MODE (ctype), speed);
3857
3858   aratio = ratio > 0 ? ratio : -ratio;
3859   if (aratio != 1)
3860     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3861
3862 fallback:
3863   if (can_autoinc)
3864     *can_autoinc = false;
3865
3866   {
3867     /* Just get the expression, expand it and measure the cost.  */
3868     tree comp = get_computation_at (data->current_loop, use, cand, at);
3869
3870     if (!comp)
3871       return infinite_cost;
3872
3873     if (address_p)
3874       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3875
3876     return new_cost (computation_cost (comp, speed), 0);
3877   }
3878 }
3879
3880 /* Determines the cost of the computation by that USE is expressed
3881    from induction variable CAND.  If ADDRESS_P is true, we just need
3882    to create an address from it, otherwise we want to get it into
3883    register.  A set of invariants we depend on is stored in
3884    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
3885    autoinc addressing is likely.  */
3886
3887 static comp_cost
3888 get_computation_cost (struct ivopts_data *data,
3889                       struct iv_use *use, struct iv_cand *cand,
3890                       bool address_p, bitmap *depends_on, bool *can_autoinc)
3891 {
3892   return get_computation_cost_at (data,
3893                                   use, cand, address_p, depends_on, use->stmt,
3894                                   can_autoinc);
3895 }
3896
3897 /* Determines cost of basing replacement of USE on CAND in a generic
3898    expression.  */
3899
3900 static bool
3901 determine_use_iv_cost_generic (struct ivopts_data *data,
3902                                struct iv_use *use, struct iv_cand *cand)
3903 {
3904   bitmap depends_on;
3905   comp_cost cost;
3906
3907   /* The simple case first -- if we need to express value of the preserved
3908      original biv, the cost is 0.  This also prevents us from counting the
3909      cost of increment twice -- once at this use and once in the cost of
3910      the candidate.  */
3911   if (cand->pos == IP_ORIGINAL
3912       && cand->incremented_at == use->stmt)
3913     {
3914       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3915       return true;
3916     }
3917
3918   cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3919   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3920
3921   return !infinite_cost_p (cost);
3922 }
3923
3924 /* Determines cost of basing replacement of USE on CAND in an address.  */
3925
3926 static bool
3927 determine_use_iv_cost_address (struct ivopts_data *data,
3928                                struct iv_use *use, struct iv_cand *cand)
3929 {
3930   bitmap depends_on;
3931   bool can_autoinc;
3932   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3933                                          &can_autoinc);
3934
3935   if (cand->ainc_use == use)
3936     {
3937       if (can_autoinc)
3938         cost.cost -= cand->cost_step;
3939       /* If we generated the candidate solely for exploiting autoincrement
3940          opportunities, and it turns out it can't be used, set the cost to
3941          infinity to make sure we ignore it.  */
3942       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3943         cost = infinite_cost;
3944     }
3945   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3946
3947   return !infinite_cost_p (cost);
3948 }
3949
3950 /* Computes value of candidate CAND at position AT in iteration NITER, and
3951    stores it to VAL.  */
3952
3953 static void
3954 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3955                aff_tree *val)
3956 {
3957   aff_tree step, delta, nit;
3958   struct iv *iv = cand->iv;
3959   tree type = TREE_TYPE (iv->base);
3960   tree steptype = type;
3961   if (POINTER_TYPE_P (type))
3962     steptype = sizetype;
3963
3964   tree_to_aff_combination (iv->step, steptype, &step);
3965   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3966   aff_combination_convert (&nit, steptype);
3967   aff_combination_mult (&nit, &step, &delta);
3968   if (stmt_after_increment (loop, cand, at))
3969     aff_combination_add (&delta, &step);
3970
3971   tree_to_aff_combination (iv->base, type, val);
3972   aff_combination_add (val, &delta);
3973 }
3974
3975 /* Returns period of induction variable iv.  */
3976
3977 static tree
3978 iv_period (struct iv *iv)
3979 {
3980   tree step = iv->step, period, type;
3981   tree pow2div;
3982
3983   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3984
3985   /* Period of the iv is gcd (step, type range).  Since type range is power
3986      of two, it suffices to determine the maximum power of two that divides
3987      step.  */
3988   pow2div = num_ending_zeros (step);
3989   type = unsigned_type_for (TREE_TYPE (step));
3990
3991   period = build_low_bits_mask (type,
3992                                 (TYPE_PRECISION (type)
3993                                  - tree_low_cst (pow2div, 1)));
3994
3995   return period;
3996 }
3997
3998 /* Returns the comparison operator used when eliminating the iv USE.  */
3999
4000 static enum tree_code
4001 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4002 {
4003   struct loop *loop = data->current_loop;
4004   basic_block ex_bb;
4005   edge exit;
4006
4007   ex_bb = gimple_bb (use->stmt);
4008   exit = EDGE_SUCC (ex_bb, 0);
4009   if (flow_bb_inside_loop_p (loop, exit->dest))
4010     exit = EDGE_SUCC (ex_bb, 1);
4011
4012   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4013 }
4014
4015 /* Check whether it is possible to express the condition in USE by comparison
4016    of candidate CAND.  If so, store the value compared with to BOUND.  */
4017
4018 static bool
4019 may_eliminate_iv (struct ivopts_data *data,
4020                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4021 {
4022   basic_block ex_bb;
4023   edge exit;
4024   tree nit, period;
4025   struct loop *loop = data->current_loop;
4026   aff_tree bnd;
4027
4028   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4029     return false;
4030
4031   /* For now works only for exits that dominate the loop latch.
4032      TODO: extend to other conditions inside loop body.  */
4033   ex_bb = gimple_bb (use->stmt);
4034   if (use->stmt != last_stmt (ex_bb)
4035       || gimple_code (use->stmt) != GIMPLE_COND
4036       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4037     return false;
4038
4039   exit = EDGE_SUCC (ex_bb, 0);
4040   if (flow_bb_inside_loop_p (loop, exit->dest))
4041     exit = EDGE_SUCC (ex_bb, 1);
4042   if (flow_bb_inside_loop_p (loop, exit->dest))
4043     return false;
4044
4045   nit = niter_for_exit (data, exit);
4046   if (!nit)
4047     return false;
4048
4049   /* Determine whether we can use the variable to test the exit condition.
4050      This is the case iff the period of the induction variable is greater
4051      than the number of iterations for which the exit condition is true.  */
4052   period = iv_period (cand->iv);
4053
4054   /* If the number of iterations is constant, compare against it directly.  */
4055   if (TREE_CODE (nit) == INTEGER_CST)
4056     {
4057       if (!tree_int_cst_lt (nit, period))
4058         return false;
4059     }
4060
4061   /* If not, and if this is the only possible exit of the loop, see whether
4062      we can get a conservative estimate on the number of iterations of the
4063      entire loop and compare against that instead.  */
4064   else if (loop_only_exit_p (loop, exit))
4065     {
4066       double_int period_value, max_niter;
4067       if (!estimated_loop_iterations (loop, true, &max_niter))
4068         return false;
4069       period_value = tree_to_double_int (period);
4070       if (double_int_ucmp (max_niter, period_value) >= 0)
4071         return false;
4072     }
4073
4074   /* Otherwise, punt.  */
4075   else
4076     return false;
4077
4078   cand_value_at (loop, cand, use->stmt, nit, &bnd);
4079
4080   *bound = aff_combination_to_tree (&bnd);
4081   /* It is unlikely that computing the number of iterations using division
4082      would be more profitable than keeping the original induction variable.  */
4083   if (expression_expensive_p (*bound))
4084     return false;
4085   return true;
4086 }
4087
4088 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4089
4090 static bool
4091 determine_use_iv_cost_condition (struct ivopts_data *data,
4092                                  struct iv_use *use, struct iv_cand *cand)
4093 {
4094   tree bound = NULL_TREE;
4095   struct iv *cmp_iv;
4096   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4097   comp_cost elim_cost, express_cost, cost;
4098   bool ok;
4099   tree *control_var, *bound_cst;
4100
4101   /* Only consider real candidates.  */
4102   if (!cand->iv)
4103     {
4104       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4105       return false;
4106     }
4107
4108   /* Try iv elimination.  */
4109   if (may_eliminate_iv (data, use, cand, &bound))
4110     {
4111       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4112       /* The bound is a loop invariant, so it will be only computed
4113          once.  */
4114       elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
4115     }
4116   else
4117     elim_cost = infinite_cost;
4118
4119   /* Try expressing the original giv.  If it is compared with an invariant,
4120      note that we cannot get rid of it.  */
4121   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4122                               NULL, &cmp_iv);
4123   gcc_assert (ok);
4124
4125   /* When the condition is a comparison of the candidate IV against
4126      zero, prefer this IV.
4127
4128      TODO: The constant that we're substracting from the cost should
4129      be target-dependent.  This information should be added to the
4130      target costs for each backend.  */
4131   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4132       && integer_zerop (*bound_cst)
4133       && (operand_equal_p (*control_var, cand->var_after, 0)
4134           || operand_equal_p (*control_var, cand->var_before, 0)))
4135     elim_cost.cost -= 1;
4136
4137   express_cost = get_computation_cost (data, use, cand, false,
4138                                        &depends_on_express, NULL);
4139   fd_ivopts_data = data;
4140   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4141
4142   /* Choose the better approach, preferring the eliminated IV. */
4143   if (compare_costs (elim_cost, express_cost) <= 0)
4144     {
4145       cost = elim_cost;
4146       depends_on = depends_on_elim;
4147       depends_on_elim = NULL;
4148     }
4149   else
4150     {
4151       cost = express_cost;
4152       depends_on = depends_on_express;
4153       depends_on_express = NULL;
4154       bound = NULL_TREE;
4155     }
4156
4157   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4158
4159   if (depends_on_elim)
4160     BITMAP_FREE (depends_on_elim);
4161   if (depends_on_express)
4162     BITMAP_FREE (depends_on_express);
4163
4164   return !infinite_cost_p (cost);
4165 }
4166
4167 /* Determines cost of basing replacement of USE on CAND.  Returns false
4168    if USE cannot be based on CAND.  */
4169
4170 static bool
4171 determine_use_iv_cost (struct ivopts_data *data,
4172                        struct iv_use *use, struct iv_cand *cand)
4173 {
4174   switch (use->type)
4175     {
4176     case USE_NONLINEAR_EXPR:
4177       return determine_use_iv_cost_generic (data, use, cand);
4178
4179     case USE_ADDRESS:
4180       return determine_use_iv_cost_address (data, use, cand);
4181
4182     case USE_COMPARE:
4183       return determine_use_iv_cost_condition (data, use, cand);
4184
4185     default:
4186       gcc_unreachable ();
4187     }
4188 }
4189
4190 /* Return true if get_computation_cost indicates that autoincrement is
4191    a possibility for the pair of USE and CAND, false otherwise.  */
4192
4193 static bool
4194 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4195                            struct iv_cand *cand)
4196 {
4197   bitmap depends_on;
4198   bool can_autoinc;
4199   comp_cost cost;
4200
4201   if (use->type != USE_ADDRESS)
4202     return false;
4203
4204   cost = get_computation_cost (data, use, cand, true, &depends_on,
4205                                &can_autoinc);
4206
4207   BITMAP_FREE (depends_on);
4208
4209   return !infinite_cost_p (cost) && can_autoinc;
4210 }
4211
4212 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4213    use that allows autoincrement, and set their AINC_USE if possible.  */
4214
4215 static void
4216 set_autoinc_for_original_candidates (struct ivopts_data *data)
4217 {
4218   unsigned i, j;
4219
4220   for (i = 0; i < n_iv_cands (data); i++)
4221     {
4222       struct iv_cand *cand = iv_cand (data, i);
4223       struct iv_use *closest = NULL;
4224       if (cand->pos != IP_ORIGINAL)
4225         continue;
4226       for (j = 0; j < n_iv_uses (data); j++)
4227         {
4228           struct iv_use *use = iv_use (data, j);
4229           unsigned uid = gimple_uid (use->stmt);
4230           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4231               || uid > gimple_uid (cand->incremented_at))
4232             continue;
4233           if (closest == NULL || uid > gimple_uid (closest->stmt))
4234             closest = use;
4235         }
4236       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4237         continue;
4238       cand->ainc_use = closest;
4239     }
4240 }
4241
4242 /* Finds the candidates for the induction variables.  */
4243
4244 static void
4245 find_iv_candidates (struct ivopts_data *data)
4246 {
4247   /* Add commonly used ivs.  */
4248   add_standard_iv_candidates (data);
4249
4250   /* Add old induction variables.  */
4251   add_old_ivs_candidates (data);
4252
4253   /* Add induction variables derived from uses.  */
4254   add_derived_ivs_candidates (data);
4255
4256   set_autoinc_for_original_candidates (data);
4257
4258   /* Record the important candidates.  */
4259   record_important_candidates (data);
4260 }
4261
4262 /* Determines costs of basing the use of the iv on an iv candidate.  */
4263
4264 static void
4265 determine_use_iv_costs (struct ivopts_data *data)
4266 {
4267   unsigned i, j;
4268   struct iv_use *use;
4269   struct iv_cand *cand;
4270   bitmap to_clear = BITMAP_ALLOC (NULL);
4271
4272   alloc_use_cost_map (data);
4273
4274   for (i = 0; i < n_iv_uses (data); i++)
4275     {
4276       use = iv_use (data, i);
4277
4278       if (data->consider_all_candidates)
4279         {
4280           for (j = 0; j < n_iv_cands (data); j++)
4281             {
4282               cand = iv_cand (data, j);
4283               determine_use_iv_cost (data, use, cand);
4284             }
4285         }
4286       else
4287         {
4288           bitmap_iterator bi;
4289
4290           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4291             {
4292               cand = iv_cand (data, j);
4293               if (!determine_use_iv_cost (data, use, cand))
4294                 bitmap_set_bit (to_clear, j);
4295             }
4296
4297           /* Remove the candidates for that the cost is infinite from
4298              the list of related candidates.  */
4299           bitmap_and_compl_into (use->related_cands, to_clear);
4300           bitmap_clear (to_clear);
4301         }
4302     }
4303
4304   BITMAP_FREE (to_clear);
4305
4306   if (dump_file && (dump_flags & TDF_DETAILS))
4307     {
4308       fprintf (dump_file, "Use-candidate costs:\n");
4309
4310       for (i = 0; i < n_iv_uses (data); i++)
4311         {
4312           use = iv_use (data, i);
4313
4314           fprintf (dump_file, "Use %d:\n", i);
4315           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4316           for (j = 0; j < use->n_map_members; j++)
4317             {
4318               if (!use->cost_map[j].cand
4319                   || infinite_cost_p (use->cost_map[j].cost))
4320                 continue;
4321
4322               fprintf (dump_file, "  %d\t%d\t%d\t",
4323                        use->cost_map[j].cand->id,
4324                        use->cost_map[j].cost.cost,
4325                        use->cost_map[j].cost.complexity);
4326               if (use->cost_map[j].depends_on)
4327                 bitmap_print (dump_file,
4328                               use->cost_map[j].depends_on, "","");
4329               fprintf (dump_file, "\n");
4330             }
4331
4332           fprintf (dump_file, "\n");
4333         }
4334       fprintf (dump_file, "\n");
4335     }
4336 }
4337
4338 /* Determines cost of the candidate CAND.  */
4339
4340 static void
4341 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4342 {
4343   comp_cost cost_base;
4344   unsigned cost, cost_step;
4345   tree base;
4346
4347   if (!cand->iv)
4348     {
4349       cand->cost = 0;
4350       return;
4351     }
4352
4353   /* There are two costs associated with the candidate -- its increment
4354      and its initialization.  The second is almost negligible for any loop
4355      that rolls enough, so we take it just very little into account.  */
4356
4357   base = cand->iv->base;
4358   cost_base = force_var_cost (data, base, NULL);
4359   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4360
4361   cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4362
4363   /* Prefer the original ivs unless we may gain something by replacing it.
4364      The reason is to make debugging simpler; so this is not relevant for
4365      artificial ivs created by other optimization passes.  */
4366   if (cand->pos != IP_ORIGINAL
4367       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4368     cost++;
4369
4370   /* Prefer not to insert statements into latch unless there are some
4371      already (so that we do not create unnecessary jumps).  */
4372   if (cand->pos == IP_END
4373       && empty_block_p (ip_end_pos (data->current_loop)))
4374     cost++;
4375
4376   cand->cost = cost;
4377   cand->cost_step = cost_step;
4378 }
4379
4380 /* Determines costs of computation of the candidates.  */
4381
4382 static void
4383 determine_iv_costs (struct ivopts_data *data)
4384 {
4385   unsigned i;
4386
4387   if (dump_file && (dump_flags & TDF_DETAILS))
4388     {
4389       fprintf (dump_file, "Candidate costs:\n");
4390       fprintf (dump_file, "  cand\tcost\n");
4391     }
4392
4393   for (i = 0; i < n_iv_cands (data); i++)
4394     {
4395       struct iv_cand *cand = iv_cand (data, i);
4396
4397       determine_iv_cost (data, cand);
4398
4399       if (dump_file && (dump_flags & TDF_DETAILS))
4400         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4401     }
4402
4403   if (dump_file && (dump_flags & TDF_DETAILS))
4404     fprintf (dump_file, "\n");
4405 }
4406
4407 /* Calculates cost for having SIZE induction variables.  */
4408
4409 static unsigned
4410 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4411 {
4412   /* We add size to the cost, so that we prefer eliminating ivs
4413      if possible.  */
4414   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4415 }
4416
4417 /* For each size of the induction variable set determine the penalty.  */
4418
4419 static void
4420 determine_set_costs (struct ivopts_data *data)
4421 {
4422   unsigned j, n;
4423   gimple phi;
4424   gimple_stmt_iterator psi;
4425   tree op;
4426   struct loop *loop = data->current_loop;
4427   bitmap_iterator bi;
4428
4429   /* We use the following model (definitely improvable, especially the
4430      cost function -- TODO):
4431
4432      We estimate the number of registers available (using MD data), name it A.
4433
4434      We estimate the number of registers used by the loop, name it U.  This
4435      number is obtained as the number of loop phi nodes (not counting virtual
4436      registers and bivs) + the number of variables from outside of the loop.
4437
4438      We set a reserve R (free regs that are used for temporary computations,
4439      etc.).  For now the reserve is a constant 3.
4440
4441      Let I be the number of induction variables.
4442
4443      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4444         make a lot of ivs without a reason).
4445      -- if A - R < U + I <= A, the cost is I * PRES_COST
4446      -- if U + I > A, the cost is I * PRES_COST and
4447         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4448
4449   if (dump_file && (dump_flags & TDF_DETAILS))
4450     {
4451       fprintf (dump_file, "Global costs:\n");
4452       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4453       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
4454       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
4455     }
4456
4457   n = 0;
4458   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4459     {
4460       phi = gsi_stmt (psi);
4461       op = PHI_RESULT (phi);
4462
4463       if (!is_gimple_reg (op))
4464         continue;
4465
4466       if (get_iv (data, op))
4467         continue;
4468
4469       n++;
4470     }
4471
4472   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4473     {
4474       struct version_info *info = ver_info (data, j);
4475
4476       if (info->inv_id && info->has_nonlin_use)
4477         n++;
4478     }
4479
4480   data->regs_used = n;
4481   if (dump_file && (dump_flags & TDF_DETAILS))
4482     fprintf (dump_file, "  regs_used %d\n", n);
4483
4484   if (dump_file && (dump_flags & TDF_DETAILS))
4485     {
4486       fprintf (dump_file, "  cost for size:\n");
4487       fprintf (dump_file, "  ivs\tcost\n");
4488       for (j = 0; j <= 2 * target_avail_regs; j++)
4489         fprintf (dump_file, "  %d\t%d\n", j,
4490                  ivopts_global_cost_for_size (data, j));
4491       fprintf (dump_file, "\n");
4492     }
4493 }
4494
4495 /* Returns true if A is a cheaper cost pair than B.  */
4496
4497 static bool
4498 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4499 {
4500   int cmp;
4501
4502   if (!a)
4503     return false;
4504
4505   if (!b)
4506     return true;
4507
4508   cmp = compare_costs (a->cost, b->cost);
4509   if (cmp < 0)
4510     return true;
4511
4512   if (cmp > 0)
4513     return false;
4514
4515   /* In case the costs are the same, prefer the cheaper candidate.  */
4516   if (a->cand->cost < b->cand->cost)
4517     return true;
4518
4519   return false;
4520 }
4521
4522 /* Computes the cost field of IVS structure.  */
4523
4524 static void
4525 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4526 {
4527   comp_cost cost = ivs->cand_use_cost;
4528   cost.cost += ivs->cand_cost;
4529   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4530
4531   ivs->cost = cost;
4532 }
4533
4534 /* Remove invariants in set INVS to set IVS.  */
4535
4536 static void
4537 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4538 {
4539   bitmap_iterator bi;
4540   unsigned iid;
4541
4542   if (!invs)
4543     return;
4544
4545   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4546     {
4547       ivs->n_invariant_uses[iid]--;
4548       if (ivs->n_invariant_uses[iid] == 0)
4549         ivs->n_regs--;
4550     }
4551 }
4552
4553 /* Set USE not to be expressed by any candidate in IVS.  */
4554
4555 static void
4556 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4557                  struct iv_use *use)
4558 {
4559   unsigned uid = use->id, cid;
4560   struct cost_pair *cp;
4561
4562   cp = ivs->cand_for_use[uid];
4563   if (!cp)
4564     return;
4565   cid = cp->cand->id;
4566
4567   ivs->bad_uses++;
4568   ivs->cand_for_use[uid] = NULL;
4569   ivs->n_cand_uses[cid]--;
4570
4571   if (ivs->n_cand_uses[cid] == 0)
4572     {
4573       bitmap_clear_bit (ivs->cands, cid);
4574       /* Do not count the pseudocandidates.  */
4575       if (cp->cand->iv)
4576         ivs->n_regs--;
4577       ivs->n_cands--;
4578       ivs->cand_cost -= cp->cand->cost;
4579
4580       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4581     }
4582
4583   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4584
4585   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4586   iv_ca_recount_cost (data, ivs);
4587 }
4588
4589 /* Add invariants in set INVS to set IVS.  */
4590
4591 static void
4592 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4593 {
4594   bitmap_iterator bi;
4595   unsigned iid;
4596
4597   if (!invs)
4598     return;
4599
4600   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4601     {
4602       ivs->n_invariant_uses[iid]++;
4603       if (ivs->n_invariant_uses[iid] == 1)
4604         ivs->n_regs++;
4605     }
4606 }
4607
4608 /* Set cost pair for USE in set IVS to CP.  */
4609
4610 static void
4611 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4612               struct iv_use *use, struct cost_pair *cp)
4613 {
4614   unsigned uid = use->id, cid;
4615
4616   if (ivs->cand_for_use[uid] == cp)
4617     return;
4618
4619   if (ivs->cand_for_use[uid])
4620     iv_ca_set_no_cp (data, ivs, use);
4621
4622   if (cp)
4623     {
4624       cid = cp->cand->id;
4625
4626       ivs->bad_uses--;
4627       ivs->cand_for_use[uid] = cp;
4628       ivs->n_cand_uses[cid]++;
4629       if (ivs->n_cand_uses[cid] == 1)
4630         {
4631           bitmap_set_bit (ivs->cands, cid);
4632           /* Do not count the pseudocandidates.  */
4633           if (cp->cand->iv)
4634             ivs->n_regs++;
4635           ivs->n_cands++;
4636           ivs->cand_cost += cp->cand->cost;
4637
4638           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4639         }
4640
4641       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4642       iv_ca_set_add_invariants (ivs, cp->depends_on);
4643       iv_ca_recount_cost (data, ivs);
4644     }
4645 }
4646
4647 /* Extend set IVS by expressing USE by some of the candidates in it
4648    if possible.  */
4649
4650 static void
4651 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4652                struct iv_use *use)
4653 {
4654   struct cost_pair *best_cp = NULL, *cp;
4655   bitmap_iterator bi;
4656   unsigned i;
4657
4658   gcc_assert (ivs->upto >= use->id);
4659
4660   if (ivs->upto == use->id)
4661     {
4662       ivs->upto++;
4663       ivs->bad_uses++;
4664     }
4665
4666   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4667     {
4668       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4669
4670       if (cheaper_cost_pair (cp, best_cp))
4671         best_cp = cp;
4672     }
4673
4674   iv_ca_set_cp (data, ivs, use, best_cp);
4675 }
4676
4677 /* Get cost for assignment IVS.  */
4678
4679 static comp_cost
4680 iv_ca_cost (struct iv_ca *ivs)
4681 {
4682   /* This was a conditional expression but it triggered a bug in
4683      Sun C 5.5.  */
4684   if (ivs->bad_uses)
4685     return infinite_cost;
4686   else
4687     return ivs->cost;
4688 }
4689
4690 /* Returns true if all dependences of CP are among invariants in IVS.  */
4691
4692 static bool
4693 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4694 {
4695   unsigned i;
4696   bitmap_iterator bi;
4697
4698   if (!cp->depends_on)
4699     return true;
4700
4701   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4702     {
4703       if (ivs->n_invariant_uses[i] == 0)
4704         return false;
4705     }
4706
4707   return true;
4708 }
4709
4710 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4711    it before NEXT_CHANGE.  */
4712
4713 static struct iv_ca_delta *
4714 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4715                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4716 {
4717   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4718
4719   change->use = use;
4720   change->old_cp = old_cp;
4721   change->new_cp = new_cp;
4722   change->next_change = next_change;
4723
4724   return change;
4725 }
4726
4727 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4728    are rewritten.  */
4729
4730 static struct iv_ca_delta *
4731 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4732 {
4733   struct iv_ca_delta *last;
4734
4735   if (!l2)
4736     return l1;
4737
4738   if (!l1)
4739     return l2;
4740
4741   for (last = l1; last->next_change; last = last->next_change)
4742     continue;
4743   last->next_change = l2;
4744
4745   return l1;
4746 }
4747
4748 /* Returns candidate by that USE is expressed in IVS.  */
4749
4750 static struct cost_pair *
4751 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4752 {
4753   return ivs->cand_for_use[use->id];
4754 }
4755
4756 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4757
4758 static struct iv_ca_delta *
4759 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4760 {
4761   struct iv_ca_delta *act, *next, *prev = NULL;
4762   struct cost_pair *tmp;
4763
4764   for (act = delta; act; act = next)
4765     {
4766       next = act->next_change;
4767       act->next_change = prev;
4768       prev = act;
4769
4770       tmp = act->old_cp;
4771       act->old_cp = act->new_cp;
4772       act->new_cp = tmp;
4773     }
4774
4775   return prev;
4776 }
4777
4778 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4779    reverted instead.  */
4780
4781 static void
4782 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4783                     struct iv_ca_delta *delta, bool forward)
4784 {
4785   struct cost_pair *from, *to;
4786   struct iv_ca_delta *act;
4787
4788   if (!forward)
4789     delta = iv_ca_delta_reverse (delta);
4790
4791   for (act = delta; act; act = act->next_change)
4792     {
4793       from = act->old_cp;
4794       to = act->new_cp;
4795       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4796       iv_ca_set_cp (data, ivs, act->use, to);
4797     }
4798
4799   if (!forward)
4800     iv_ca_delta_reverse (delta);
4801 }
4802
4803 /* Returns true if CAND is used in IVS.  */
4804
4805 static bool
4806 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4807 {
4808   return ivs->n_cand_uses[cand->id] > 0;
4809 }
4810
4811 /* Returns number of induction variable candidates in the set IVS.  */
4812
4813 static unsigned
4814 iv_ca_n_cands (struct iv_ca *ivs)
4815 {
4816   return ivs->n_cands;
4817 }
4818
4819 /* Free the list of changes DELTA.  */
4820
4821 static void
4822 iv_ca_delta_free (struct iv_ca_delta **delta)
4823 {
4824   struct iv_ca_delta *act, *next;
4825
4826   for (act = *delta; act; act = next)
4827     {
4828       next = act->next_change;
4829       free (act);
4830     }
4831
4832   *delta = NULL;
4833 }
4834
4835 /* Allocates new iv candidates assignment.  */
4836
4837 static struct iv_ca *
4838 iv_ca_new (struct ivopts_data *data)
4839 {
4840   struct iv_ca *nw = XNEW (struct iv_ca);
4841
4842   nw->upto = 0;
4843   nw->bad_uses = 0;
4844   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4845   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4846   nw->cands = BITMAP_ALLOC (NULL);
4847   nw->n_cands = 0;
4848   nw->n_regs = 0;
4849   nw->cand_use_cost = zero_cost;
4850   nw->cand_cost = 0;
4851   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4852   nw->cost = zero_cost;
4853
4854   return nw;
4855 }
4856
4857 /* Free memory occupied by the set IVS.  */
4858
4859 static void
4860 iv_ca_free (struct iv_ca **ivs)
4861 {
4862   free ((*ivs)->cand_for_use);
4863   free ((*ivs)->n_cand_uses);
4864   BITMAP_FREE ((*ivs)->cands);
4865   free ((*ivs)->n_invariant_uses);
4866   free (*ivs);
4867   *ivs = NULL;
4868 }
4869
4870 /* Dumps IVS to FILE.  */
4871
4872 static void
4873 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4874 {
4875   const char *pref = "  invariants ";
4876   unsigned i;
4877   comp_cost cost = iv_ca_cost (ivs);
4878
4879   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4880   bitmap_print (file, ivs->cands, "  candidates ","\n");
4881
4882   for (i = 1; i <= data->max_inv_id; i++)
4883     if (ivs->n_invariant_uses[i])
4884       {
4885         fprintf (file, "%s%d", pref, i);
4886         pref = ", ";
4887       }
4888   fprintf (file, "\n");
4889 }
4890
4891 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4892    new set, and store differences in DELTA.  Number of induction variables
4893    in the new set is stored to N_IVS.  */
4894
4895 static comp_cost
4896 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4897               struct iv_cand *cand, struct iv_ca_delta **delta,
4898               unsigned *n_ivs)
4899 {
4900   unsigned i;
4901   comp_cost cost;
4902   struct iv_use *use;
4903   struct cost_pair *old_cp, *new_cp;
4904
4905   *delta = NULL;
4906   for (i = 0; i < ivs->upto; i++)
4907     {
4908       use = iv_use (data, i);
4909       old_cp = iv_ca_cand_for_use (ivs, use);
4910
4911       if (old_cp
4912           && old_cp->cand == cand)
4913         continue;
4914
4915       new_cp = get_use_iv_cost (data, use, cand);
4916       if (!new_cp)
4917         continue;
4918
4919       if (!iv_ca_has_deps (ivs, new_cp))
4920         continue;
4921
4922       if (!cheaper_cost_pair (new_cp, old_cp))
4923         continue;
4924
4925       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4926     }
4927
4928   iv_ca_delta_commit (data, ivs, *delta, true);
4929   cost = iv_ca_cost (ivs);
4930   if (n_ivs)
4931     *n_ivs = iv_ca_n_cands (ivs);
4932   iv_ca_delta_commit (data, ivs, *delta, false);
4933
4934   return cost;
4935 }
4936
4937 /* Try narrowing set IVS by removing CAND.  Return the cost of
4938    the new set and store the differences in DELTA.  */
4939
4940 static comp_cost
4941 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4942               struct iv_cand *cand, struct iv_ca_delta **delta)
4943 {
4944   unsigned i, ci;
4945   struct iv_use *use;
4946   struct cost_pair *old_cp, *new_cp, *cp;
4947   bitmap_iterator bi;
4948   struct iv_cand *cnd;
4949   comp_cost cost;
4950
4951   *delta = NULL;
4952   for (i = 0; i < n_iv_uses (data); i++)
4953     {
4954       use = iv_use (data, i);
4955
4956       old_cp = iv_ca_cand_for_use (ivs, use);
4957       if (old_cp->cand != cand)
4958         continue;
4959
4960       new_cp = NULL;
4961
4962       if (data->consider_all_candidates)
4963         {
4964           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4965             {
4966               if (ci == cand->id)
4967                 continue;
4968
4969               cnd = iv_cand (data, ci);
4970
4971               cp = get_use_iv_cost (data, use, cnd);
4972               if (!cp)
4973                 continue;
4974               if (!iv_ca_has_deps (ivs, cp))
4975                 continue;
4976
4977               if (!cheaper_cost_pair (cp, new_cp))
4978                 continue;
4979
4980               new_cp = cp;
4981             }
4982         }
4983       else
4984         {
4985           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4986             {
4987               if (ci == cand->id)
4988                 continue;
4989
4990               cnd = iv_cand (data, ci);
4991
4992               cp = get_use_iv_cost (data, use, cnd);
4993               if (!cp)
4994                 continue;
4995               if (!iv_ca_has_deps (ivs, cp))
4996                 continue;
4997
4998               if (!cheaper_cost_pair (cp, new_cp))
4999                 continue;
5000
5001               new_cp = cp;
5002             }
5003         }
5004
5005       if (!new_cp)
5006         {
5007           iv_ca_delta_free (delta);
5008           return infinite_cost;
5009         }
5010
5011       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5012     }
5013
5014   iv_ca_delta_commit (data, ivs, *delta, true);
5015   cost = iv_ca_cost (ivs);
5016   iv_ca_delta_commit (data, ivs, *delta, false);
5017
5018   return cost;
5019 }
5020
5021 /* Try optimizing the set of candidates IVS by removing candidates different
5022    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5023    differences in DELTA.  */
5024
5025 static comp_cost
5026 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5027              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5028 {
5029   bitmap_iterator bi;
5030   struct iv_ca_delta *act_delta, *best_delta;
5031   unsigned i;
5032   comp_cost best_cost, acost;
5033   struct iv_cand *cand;
5034
5035   best_delta = NULL;
5036   best_cost = iv_ca_cost (ivs);
5037
5038   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5039     {
5040       cand = iv_cand (data, i);
5041
5042       if (cand == except_cand)
5043         continue;
5044
5045       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5046
5047       if (compare_costs (acost, best_cost) < 0)
5048         {
5049           best_cost = acost;
5050           iv_ca_delta_free (&best_delta);
5051           best_delta = act_delta;
5052         }
5053       else
5054         iv_ca_delta_free (&act_delta);
5055     }
5056
5057   if (!best_delta)
5058     {
5059       *delta = NULL;
5060       return best_cost;
5061     }
5062
5063   /* Recurse to possibly remove other unnecessary ivs.  */
5064   iv_ca_delta_commit (data, ivs, best_delta, true);
5065   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5066   iv_ca_delta_commit (data, ivs, best_delta, false);
5067   *delta = iv_ca_delta_join (best_delta, *delta);
5068   return best_cost;
5069 }
5070
5071 /* Tries to extend the sets IVS in the best possible way in order
5072    to express the USE.  */
5073
5074 static bool
5075 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5076                   struct iv_use *use)
5077 {
5078   comp_cost best_cost, act_cost;
5079   unsigned i;
5080   bitmap_iterator bi;
5081   struct iv_cand *cand;
5082   struct iv_ca_delta *best_delta = NULL, *act_delta;
5083   struct cost_pair *cp;
5084
5085   iv_ca_add_use (data, ivs, use);
5086   best_cost = iv_ca_cost (ivs);
5087
5088   cp = iv_ca_cand_for_use (ivs, use);
5089   if (cp)
5090     {
5091       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5092       iv_ca_set_no_cp (data, ivs, use);
5093     }
5094
5095   /* First try important candidates not based on any memory object.  Only if
5096      this fails, try the specific ones.  Rationale -- in loops with many
5097      variables the best choice often is to use just one generic biv.  If we
5098      added here many ivs specific to the uses, the optimization algorithm later
5099      would be likely to get stuck in a local minimum, thus causing us to create
5100      too many ivs.  The approach from few ivs to more seems more likely to be
5101      successful -- starting from few ivs, replacing an expensive use by a
5102      specific iv should always be a win.  */
5103   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5104     {
5105       cand = iv_cand (data, i);
5106
5107       if (cand->iv->base_object != NULL_TREE)
5108         continue;
5109
5110       if (iv_ca_cand_used_p (ivs, cand))
5111         continue;
5112
5113       cp = get_use_iv_cost (data, use, cand);
5114       if (!cp)
5115         continue;
5116
5117       iv_ca_set_cp (data, ivs, use, cp);
5118       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5119       iv_ca_set_no_cp (data, ivs, use);
5120       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5121
5122       if (compare_costs (act_cost, best_cost) < 0)
5123         {
5124           best_cost = act_cost;
5125
5126           iv_ca_delta_free (&best_delta);
5127           best_delta = act_delta;
5128         }
5129       else
5130         iv_ca_delta_free (&act_delta);
5131     }
5132
5133   if (infinite_cost_p (best_cost))
5134     {
5135       for (i = 0; i < use->n_map_members; i++)
5136         {
5137           cp = use->cost_map + i;
5138           cand = cp->cand;
5139           if (!cand)
5140             continue;
5141
5142           /* Already tried this.  */
5143           if (cand->important && cand->iv->base_object == NULL_TREE)
5144             continue;
5145
5146           if (iv_ca_cand_used_p (ivs, cand))
5147             continue;
5148
5149           act_delta = NULL;
5150           iv_ca_set_cp (data, ivs, use, cp);
5151           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5152           iv_ca_set_no_cp (data, ivs, use);
5153           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5154                                        cp, act_delta);
5155
5156           if (compare_costs (act_cost, best_cost) < 0)
5157             {
5158               best_cost = act_cost;
5159
5160               if (best_delta)
5161                 iv_ca_delta_free (&best_delta);
5162               best_delta = act_delta;
5163             }
5164           else
5165             iv_ca_delta_free (&act_delta);
5166         }
5167     }
5168
5169   iv_ca_delta_commit (data, ivs, best_delta, true);
5170   iv_ca_delta_free (&best_delta);
5171
5172   return !infinite_cost_p (best_cost);
5173 }
5174
5175 /* Finds an initial assignment of candidates to uses.  */
5176
5177 static struct iv_ca *
5178 get_initial_solution (struct ivopts_data *data)
5179 {
5180   struct iv_ca *ivs = iv_ca_new (data);
5181   unsigned i;
5182
5183   for (i = 0; i < n_iv_uses (data); i++)
5184     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5185       {
5186         iv_ca_free (&ivs);
5187         return NULL;
5188       }
5189
5190   return ivs;
5191 }
5192
5193 /* Tries to improve set of induction variables IVS.  */
5194
5195 static bool
5196 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5197 {
5198   unsigned i, n_ivs;
5199   comp_cost acost, best_cost = iv_ca_cost (ivs);
5200   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5201   struct iv_cand *cand;
5202
5203   /* Try extending the set of induction variables by one.  */
5204   for (i = 0; i < n_iv_cands (data); i++)
5205     {
5206       cand = iv_cand (data, i);
5207
5208       if (iv_ca_cand_used_p (ivs, cand))
5209         continue;
5210
5211       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5212       if (!act_delta)
5213         continue;
5214
5215       /* If we successfully added the candidate and the set is small enough,
5216          try optimizing it by removing other candidates.  */
5217       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5218         {
5219           iv_ca_delta_commit (data, ivs, act_delta, true);
5220           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5221           iv_ca_delta_commit (data, ivs, act_delta, false);
5222           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5223         }
5224
5225       if (compare_costs (acost, best_cost) < 0)
5226         {
5227           best_cost = acost;
5228           iv_ca_delta_free (&best_delta);
5229           best_delta = act_delta;
5230         }
5231       else
5232         iv_ca_delta_free (&act_delta);
5233     }
5234
5235   if (!best_delta)
5236     {
5237       /* Try removing the candidates from the set instead.  */
5238       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5239
5240       /* Nothing more we can do.  */
5241       if (!best_delta)
5242         return false;
5243     }
5244
5245   iv_ca_delta_commit (data, ivs, best_delta, true);
5246   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5247   iv_ca_delta_free (&best_delta);
5248   return true;
5249 }
5250
5251 /* Attempts to find the optimal set of induction variables.  We do simple
5252    greedy heuristic -- we try to replace at most one candidate in the selected
5253    solution and remove the unused ivs while this improves the cost.  */
5254
5255 static struct iv_ca *
5256 find_optimal_iv_set (struct ivopts_data *data)
5257 {
5258   unsigned i;
5259   struct iv_ca *set;
5260   struct iv_use *use;
5261
5262   /* Get the initial solution.  */
5263   set = get_initial_solution (data);
5264   if (!set)
5265     {
5266       if (dump_file && (dump_flags & TDF_DETAILS))
5267         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5268       return NULL;
5269     }
5270
5271   if (dump_file && (dump_flags & TDF_DETAILS))
5272     {
5273       fprintf (dump_file, "Initial set of candidates:\n");
5274       iv_ca_dump (data, dump_file, set);
5275     }
5276
5277   while (try_improve_iv_set (data, set))
5278     {
5279       if (dump_file && (dump_flags & TDF_DETAILS))
5280         {
5281           fprintf (dump_file, "Improved to:\n");
5282           iv_ca_dump (data, dump_file, set);
5283         }
5284     }
5285
5286   if (dump_file && (dump_flags & TDF_DETAILS))
5287     {
5288       comp_cost cost = iv_ca_cost (set);
5289       fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5290     }
5291
5292   for (i = 0; i < n_iv_uses (data); i++)
5293     {
5294       use = iv_use (data, i);
5295       use->selected = iv_ca_cand_for_use (set, use)->cand;
5296     }
5297
5298   return set;
5299 }
5300
5301 /* Creates a new induction variable corresponding to CAND.  */
5302
5303 static void
5304 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5305 {
5306   gimple_stmt_iterator incr_pos;
5307   tree base;
5308   bool after = false;
5309
5310   if (!cand->iv)
5311     return;
5312
5313   switch (cand->pos)
5314     {
5315     case IP_NORMAL:
5316       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5317       break;
5318
5319     case IP_END:
5320       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5321       after = true;
5322       break;
5323
5324     case IP_AFTER_USE:
5325       after = true;
5326       /* fall through */
5327     case IP_BEFORE_USE:
5328       incr_pos = gsi_for_stmt (cand->incremented_at);
5329       break;
5330
5331     case IP_ORIGINAL:
5332       /* Mark that the iv is preserved.  */
5333       name_info (data, cand->var_before)->preserve_biv = true;
5334       name_info (data, cand->var_after)->preserve_biv = true;
5335
5336       /* Rewrite the increment so that it uses var_before directly.  */
5337       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5338
5339       return;
5340     }
5341
5342   gimple_add_tmp_var (cand->var_before);
5343   add_referenced_var (cand->var_before);
5344
5345   base = unshare_expr (cand->iv->base);
5346
5347   create_iv (base, unshare_expr (cand->iv->step),
5348              cand->var_before, data->current_loop,
5349              &incr_pos, after, &cand->var_before, &cand->var_after);
5350 }
5351
5352 /* Creates new induction variables described in SET.  */
5353
5354 static void
5355 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5356 {
5357   unsigned i;
5358   struct iv_cand *cand;
5359   bitmap_iterator bi;
5360
5361   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5362     {
5363       cand = iv_cand (data, i);
5364       create_new_iv (data, cand);
5365     }
5366 }
5367
5368
5369 /* Rewrites USE (definition of iv used in a nonlinear expression)
5370    using candidate CAND.  */
5371
5372 static void
5373 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5374                             struct iv_use *use, struct iv_cand *cand)
5375 {
5376   tree comp;
5377   tree op, tgt;
5378   gimple ass;
5379   gimple_stmt_iterator bsi;
5380
5381   /* An important special case -- if we are asked to express value of
5382      the original iv by itself, just exit; there is no need to
5383      introduce a new computation (that might also need casting the
5384      variable to unsigned and back).  */
5385   if (cand->pos == IP_ORIGINAL
5386       && cand->incremented_at == use->stmt)
5387     {
5388       tree step, ctype, utype;
5389       enum tree_code incr_code = PLUS_EXPR, old_code;
5390
5391       gcc_assert (is_gimple_assign (use->stmt));
5392       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5393
5394       step = cand->iv->step;
5395       ctype = TREE_TYPE (step);
5396       utype = TREE_TYPE (cand->var_after);
5397       if (TREE_CODE (step) == NEGATE_EXPR)
5398         {
5399           incr_code = MINUS_EXPR;
5400           step = TREE_OPERAND (step, 0);
5401         }
5402
5403       /* Check whether we may leave the computation unchanged.
5404          This is the case only if it does not rely on other
5405          computations in the loop -- otherwise, the computation
5406          we rely upon may be removed in remove_unused_ivs,
5407          thus leading to ICE.  */
5408       old_code = gimple_assign_rhs_code (use->stmt);
5409       if (old_code == PLUS_EXPR
5410           || old_code == MINUS_EXPR
5411           || old_code == POINTER_PLUS_EXPR)
5412         {
5413           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5414             op = gimple_assign_rhs2 (use->stmt);
5415           else if (old_code != MINUS_EXPR
5416                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5417             op = gimple_assign_rhs1 (use->stmt);
5418           else
5419             op = NULL_TREE;
5420         }
5421       else
5422         op = NULL_TREE;
5423
5424       if (op
5425           && (TREE_CODE (op) == INTEGER_CST
5426               || operand_equal_p (op, step, 0)))
5427         return;
5428
5429       /* Otherwise, add the necessary computations to express
5430          the iv.  */
5431       op = fold_convert (ctype, cand->var_before);
5432       comp = fold_convert (utype,
5433                            build2 (incr_code, ctype, op,
5434                                    unshare_expr (step)));
5435     }
5436   else
5437     {
5438       comp = get_computation (data->current_loop, use, cand);
5439       gcc_assert (comp != NULL_TREE);
5440     }
5441
5442   switch (gimple_code (use->stmt))
5443     {
5444     case GIMPLE_PHI:
5445       tgt = PHI_RESULT (use->stmt);
5446
5447       /* If we should keep the biv, do not replace it.  */
5448       if (name_info (data, tgt)->preserve_biv)
5449         return;
5450
5451       bsi = gsi_after_labels (gimple_bb (use->stmt));
5452       break;
5453
5454     case GIMPLE_ASSIGN:
5455       tgt = gimple_assign_lhs (use->stmt);
5456       bsi = gsi_for_stmt (use->stmt);
5457       break;
5458
5459     default:
5460       gcc_unreachable ();
5461     }
5462
5463   op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5464                                  true, GSI_SAME_STMT);
5465
5466   if (gimple_code (use->stmt) == GIMPLE_PHI)
5467     {
5468       ass = gimple_build_assign (tgt, op);
5469       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5470
5471       bsi = gsi_for_stmt (use->stmt);
5472       remove_phi_node (&bsi, false);
5473     }
5474   else
5475     {
5476       gimple_assign_set_rhs_from_tree (&bsi, op);
5477       use->stmt = gsi_stmt (bsi);
5478     }
5479 }
5480
5481 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5482    for_each_index.  */
5483
5484 static bool
5485 idx_remove_ssa_names (tree base, tree *idx,
5486                       void *data ATTRIBUTE_UNUSED)
5487 {
5488   tree *op;
5489
5490   if (TREE_CODE (*idx) == SSA_NAME)
5491     *idx = SSA_NAME_VAR (*idx);
5492
5493   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5494     {
5495       op = &TREE_OPERAND (base, 2);
5496       if (*op
5497           && TREE_CODE (*op) == SSA_NAME)
5498         *op = SSA_NAME_VAR (*op);
5499       op = &TREE_OPERAND (base, 3);
5500       if (*op
5501           && TREE_CODE (*op) == SSA_NAME)
5502         *op = SSA_NAME_VAR (*op);
5503     }
5504
5505   return true;
5506 }
5507
5508 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5509
5510 static tree
5511 unshare_and_remove_ssa_names (tree ref)
5512 {
5513   ref = unshare_expr (ref);
5514   for_each_index (&ref, idx_remove_ssa_names, NULL);
5515
5516   return ref;
5517 }
5518
5519 /* Copies the reference information from OLD_REF to NEW_REF.  */
5520
5521 static void
5522 copy_ref_info (tree new_ref, tree old_ref)
5523 {
5524   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5525     copy_mem_ref_info (new_ref, old_ref);
5526   else
5527     {
5528       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5529       TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5530       TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5531     }
5532 }
5533
5534 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5535
5536 static void
5537 rewrite_use_address (struct ivopts_data *data,
5538                      struct iv_use *use, struct iv_cand *cand)
5539 {
5540   aff_tree aff;
5541   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5542   tree base_hint = NULL_TREE;
5543   tree ref;
5544   bool ok;
5545
5546   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5547   gcc_assert (ok);
5548   unshare_aff_combination (&aff);
5549
5550   /* To avoid undefined overflow problems, all IV candidates use unsigned
5551      integer types.  The drawback is that this makes it impossible for
5552      create_mem_ref to distinguish an IV that is based on a memory object
5553      from one that represents simply an offset.
5554
5555      To work around this problem, we pass a hint to create_mem_ref that
5556      indicates which variable (if any) in aff is an IV based on a memory
5557      object.  Note that we only consider the candidate.  If this is not
5558      based on an object, the base of the reference is in some subexpression
5559      of the use -- but these will use pointer types, so they are recognized
5560      by the create_mem_ref heuristics anyway.  */
5561   if (cand->iv->base_object)
5562     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5563
5564   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
5565                         data->speed);
5566   copy_ref_info (ref, *use->op_p);
5567   *use->op_p = ref;
5568 }
5569
5570 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5571    candidate CAND.  */
5572
5573 static void
5574 rewrite_use_compare (struct ivopts_data *data,
5575                      struct iv_use *use, struct iv_cand *cand)
5576 {
5577   tree comp, *var_p, op, bound;
5578   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5579   enum tree_code compare;
5580   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5581   bool ok;
5582
5583   bound = cp->value;
5584   if (bound)
5585     {
5586       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5587       tree var_type = TREE_TYPE (var);
5588       gimple_seq stmts;
5589
5590       compare = iv_elimination_compare (data, use);
5591       bound = unshare_expr (fold_convert (var_type, bound));
5592       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5593       if (stmts)
5594         gsi_insert_seq_on_edge_immediate (
5595                 loop_preheader_edge (data->current_loop),
5596                 stmts);
5597
5598       gimple_cond_set_lhs (use->stmt, var);
5599       gimple_cond_set_code (use->stmt, compare);
5600       gimple_cond_set_rhs (use->stmt, op);
5601       return;
5602     }
5603
5604   /* The induction variable elimination failed; just express the original
5605      giv.  */
5606   comp = get_computation (data->current_loop, use, cand);
5607   gcc_assert (comp != NULL_TREE);
5608
5609   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5610   gcc_assert (ok);
5611
5612   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5613                                      true, GSI_SAME_STMT);
5614 }
5615
5616 /* Rewrites USE using candidate CAND.  */
5617
5618 static void
5619 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5620 {
5621   switch (use->type)
5622     {
5623       case USE_NONLINEAR_EXPR:
5624         rewrite_use_nonlinear_expr (data, use, cand);
5625         break;
5626
5627       case USE_ADDRESS:
5628         rewrite_use_address (data, use, cand);
5629         break;
5630
5631       case USE_COMPARE:
5632         rewrite_use_compare (data, use, cand);
5633         break;
5634
5635       default:
5636         gcc_unreachable ();
5637     }
5638
5639   update_stmt (use->stmt);
5640 }
5641
5642 /* Rewrite the uses using the selected induction variables.  */
5643
5644 static void
5645 rewrite_uses (struct ivopts_data *data)
5646 {
5647   unsigned i;
5648   struct iv_cand *cand;
5649   struct iv_use *use;
5650
5651   for (i = 0; i < n_iv_uses (data); i++)
5652     {
5653       use = iv_use (data, i);
5654       cand = use->selected;
5655       gcc_assert (cand);
5656
5657       rewrite_use (data, use, cand);
5658     }
5659 }
5660
5661 /* Removes the ivs that are not used after rewriting.  */
5662
5663 static void
5664 remove_unused_ivs (struct ivopts_data *data)
5665 {
5666   unsigned j;
5667   bitmap_iterator bi;
5668   bitmap toremove = BITMAP_ALLOC (NULL);
5669
5670   /* Figure out an order in which to release SSA DEFs so that we don't
5671      release something that we'd have to propagate into a debug stmt
5672      afterwards.  */
5673   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5674     {
5675       struct version_info *info;
5676
5677       info = ver_info (data, j);
5678       if (info->iv
5679           && !integer_zerop (info->iv->step)
5680           && !info->inv_id
5681           && !info->iv->have_use_for
5682           && !info->preserve_biv)
5683         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5684     }
5685
5686   release_defs_bitset (toremove);
5687
5688   BITMAP_FREE (toremove);
5689 }
5690
5691 /* Frees data allocated by the optimization of a single loop.  */
5692
5693 static void
5694 free_loop_data (struct ivopts_data *data)
5695 {
5696   unsigned i, j;
5697   bitmap_iterator bi;
5698   tree obj;
5699
5700   if (data->niters)
5701     {
5702       pointer_map_destroy (data->niters);
5703       data->niters = NULL;
5704     }
5705
5706   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5707     {
5708       struct version_info *info;
5709
5710       info = ver_info (data, i);
5711       if (info->iv)
5712         free (info->iv);
5713       info->iv = NULL;
5714       info->has_nonlin_use = false;
5715       info->preserve_biv = false;
5716       info->inv_id = 0;
5717     }
5718   bitmap_clear (data->relevant);
5719   bitmap_clear (data->important_candidates);
5720
5721   for (i = 0; i < n_iv_uses (data); i++)
5722     {
5723       struct iv_use *use = iv_use (data, i);
5724
5725       free (use->iv);
5726       BITMAP_FREE (use->related_cands);
5727       for (j = 0; j < use->n_map_members; j++)
5728         if (use->cost_map[j].depends_on)
5729           BITMAP_FREE (use->cost_map[j].depends_on);
5730       free (use->cost_map);
5731       free (use);
5732     }
5733   VEC_truncate (iv_use_p, data->iv_uses, 0);
5734
5735   for (i = 0; i < n_iv_cands (data); i++)
5736     {
5737       struct iv_cand *cand = iv_cand (data, i);
5738
5739       if (cand->iv)
5740         free (cand->iv);
5741       if (cand->depends_on)
5742         BITMAP_FREE (cand->depends_on);
5743       free (cand);
5744     }
5745   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5746
5747   if (data->version_info_size < num_ssa_names)
5748     {
5749       data->version_info_size = 2 * num_ssa_names;
5750       free (data->version_info);
5751       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5752     }
5753
5754   data->max_inv_id = 0;
5755
5756   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5757     SET_DECL_RTL (obj, NULL_RTX);
5758
5759   VEC_truncate (tree, decl_rtl_to_reset, 0);
5760 }
5761
5762 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5763    loop tree.  */
5764
5765 static void
5766 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5767 {
5768   free_loop_data (data);
5769   free (data->version_info);
5770   BITMAP_FREE (data->relevant);
5771   BITMAP_FREE (data->important_candidates);
5772
5773   VEC_free (tree, heap, decl_rtl_to_reset);
5774   VEC_free (iv_use_p, heap, data->iv_uses);
5775   VEC_free (iv_cand_p, heap, data->iv_candidates);
5776 }
5777
5778 /* Optimizes the LOOP.  Returns true if anything changed.  */
5779
5780 static bool
5781 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5782 {
5783   bool changed = false;
5784   struct iv_ca *iv_ca;
5785   edge exit;
5786   basic_block *body;
5787
5788   gcc_assert (!data->niters);
5789   data->current_loop = loop;
5790   data->speed = optimize_loop_for_speed_p (loop);
5791
5792   if (dump_file && (dump_flags & TDF_DETAILS))
5793     {
5794       fprintf (dump_file, "Processing loop %d\n", loop->num);
5795
5796       exit = single_dom_exit (loop);
5797       if (exit)
5798         {
5799           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5800                    exit->src->index, exit->dest->index);
5801           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5802           fprintf (dump_file, "\n");
5803         }
5804
5805       fprintf (dump_file, "\n");
5806     }
5807
5808   body = get_loop_body (loop);
5809   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5810   free (body);
5811
5812   /* For each ssa name determines whether it behaves as an induction variable
5813      in some loop.  */
5814   if (!find_induction_variables (data))
5815     goto finish;
5816
5817   /* Finds interesting uses (item 1).  */
5818   find_interesting_uses (data);
5819   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5820     goto finish;
5821
5822   /* Finds candidates for the induction variables (item 2).  */
5823   find_iv_candidates (data);
5824
5825   /* Calculates the costs (item 3, part 1).  */
5826   determine_iv_costs (data);
5827   determine_use_iv_costs (data);
5828   determine_set_costs (data);
5829
5830   /* Find the optimal set of induction variables (item 3, part 2).  */
5831   iv_ca = find_optimal_iv_set (data);
5832   if (!iv_ca)
5833     goto finish;
5834   changed = true;
5835
5836   /* Create the new induction variables (item 4, part 1).  */
5837   create_new_ivs (data, iv_ca);
5838   iv_ca_free (&iv_ca);
5839
5840   /* Rewrite the uses (item 4, part 2).  */
5841   rewrite_uses (data);
5842
5843   /* Remove the ivs that are unused after rewriting.  */
5844   remove_unused_ivs (data);
5845
5846   /* We have changed the structure of induction variables; it might happen
5847      that definitions in the scev database refer to some of them that were
5848      eliminated.  */
5849   scev_reset ();
5850
5851 finish:
5852   free_loop_data (data);
5853
5854   return changed;
5855 }
5856
5857 /* Main entry point.  Optimizes induction variables in loops.  */
5858
5859 void
5860 tree_ssa_iv_optimize (void)
5861 {
5862   struct loop *loop;
5863   struct ivopts_data data;
5864   loop_iterator li;
5865
5866   tree_ssa_iv_optimize_init (&data);
5867
5868   /* Optimize the loops starting with the innermost ones.  */
5869   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5870     {
5871       if (dump_file && (dump_flags & TDF_DETAILS))
5872         flow_loop_dump (loop, dump_file, NULL, 1);
5873
5874       tree_ssa_iv_optimize_loop (&data, loop);
5875     }
5876
5877   tree_ssa_iv_optimize_finalize (&data);
5878 }