gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
   3    Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "pointer-set.h"
  87 #include "hashtab.h"
  88 #include "tree-chrec.h"
  89 #include "tree-scalar-evolution.h"
  90 #include "cfgloop.h"
  91 #include "params.h"
  92 #include "langhooks.h"
  93 #include "tree-affine.h"
  94 #include "target.h"
  95
  96 /* The infinite cost.  */
  97 #define INFTY 10000000
  98
  99 /* The expected number of loop iterations.  TODO -- use profiling instead of
 100    this.  */
 101 #define AVG_LOOP_NITER(LOOP) 5
 102
 103
 104 /* Representation of the induction variable.  */
 105 struct iv
 106 {
 107   tree base;            /* Initial value of the iv.  */
 108   tree base_object;     /* A memory object to that the induction variable points.  */
 109   tree step;            /* Step of the iv (constant only).  */
 110   tree ssa_name;        /* The ssa name with the value.  */
 111   bool biv_p;           /* Is it a biv?  */
 112   bool have_use_for;    /* Do we already have a use for it?  */
 113   unsigned use_id;      /* The identifier in the use if it is the case.  */
 114 };
 115
 116 /* Per-ssa version information (induction variable descriptions, etc.).  */
 117 struct version_info
 118 {
 119   tree name;            /* The ssa name.  */
 120   struct iv *iv;        /* Induction variable description.  */
 121   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 122                            an expression that is not an induction variable.  */
 123   unsigned inv_id;      /* Id of an invariant.  */
 124   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 125 };
 126
 127 /* Types of uses.  */
 128 enum use_type
 129 {
 130   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 131   USE_ADDRESS,          /* Use in an address.  */
 132   USE_COMPARE           /* Use is a compare.  */
 133 };
 134
 135 /* Cost of a computation.  */
 136 typedef struct
 137 {
 138   unsigned cost;        /* The runtime cost.  */
 139   unsigned complexity;  /* The estimate of the complexity of the code for
 140                            the computation (in no concrete units --
 141                            complexity field should be larger for more
 142                            complex expressions and addressing modes).  */
 143 } comp_cost;
 144
 145 static const comp_cost zero_cost = {0, 0};
 146 static const comp_cost infinite_cost = {INFTY, INFTY};
 147
 148 /* The candidate - cost pair.  */
 149 struct cost_pair
 150 {
 151   struct iv_cand *cand; /* The candidate.  */
 152   comp_cost cost;       /* The cost.  */
 153   bitmap depends_on;    /* The list of invariants that have to be
 154                            preserved.  */
 155   tree value;           /* For final value elimination, the expression for
 156                            the final value of the iv.  For iv elimination,
 157                            the new bound to compare with.  */
 158 };
 159
 160 /* Use.  */
 161 struct iv_use
 162 {
 163   unsigned id;          /* The id of the use.  */
 164   enum use_type type;   /* Type of the use.  */
 165   struct iv *iv;        /* The induction variable it is based on.  */
 166   gimple stmt;          /* Statement in that it occurs.  */
 167   tree *op_p;           /* The place where it occurs.  */
 168   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 169                            important ones.  */
 170
 171   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 172   struct cost_pair *cost_map;
 173                         /* The costs wrto the iv candidates.  */
 174
 175   struct iv_cand *selected;
 176                         /* The selected candidate.  */
 177 };
 178
 179 /* The position where the iv is computed.  */
 180 enum iv_position
 181 {
 182   IP_NORMAL,            /* At the end, just before the exit condition.  */
 183   IP_END,               /* At the end of the latch block.  */
 184   IP_ORIGINAL           /* The original biv.  */
 185 };
 186
 187 /* The induction variable candidate.  */
 188 struct iv_cand
 189 {
 190   unsigned id;          /* The number of the candidate.  */
 191   bool important;       /* Whether this is an "important" candidate, i.e. such
 192                            that it should be considered by all uses.  */
 193   enum iv_position pos; /* Where it is computed.  */
 194   gimple incremented_at;/* For original biv, the statement where it is
 195                            incremented.  */
 196   tree var_before;      /* The variable used for it before increment.  */
 197   tree var_after;       /* The variable used for it after increment.  */
 198   struct iv *iv;        /* The value of the candidate.  NULL for
 199                            "pseudocandidate" used to indicate the possibility
 200                            to replace the final value of an iv by direct
 201                            computation of the value.  */
 202   unsigned cost;        /* Cost of the candidate.  */
 203   bitmap depends_on;    /* The list of invariants that are used in step of the
 204                            biv.  */
 205 };
 206
 207 /* The data used by the induction variable optimizations.  */
 208
 209 typedef struct iv_use *iv_use_p;
 210 DEF_VEC_P(iv_use_p);
 211 DEF_VEC_ALLOC_P(iv_use_p,heap);
 212
 213 typedef struct iv_cand *iv_cand_p;
 214 DEF_VEC_P(iv_cand_p);
 215 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 216
 217 struct ivopts_data
 218 {
 219   /* The currently optimized loop.  */
 220   struct loop *current_loop;
 221
 222   /* Number of registers used in it.  */
 223   unsigned regs_used;
 224
 225   /* Numbers of iterations for all exits of the current loop.  */
 226   struct pointer_map_t *niters;
 227
 228   /* The size of version_info array allocated.  */
 229   unsigned version_info_size;
 230
 231   /* The array of information for the ssa names.  */
 232   struct version_info *version_info;
 233
 234   /* The bitmap of indices in version_info whose value was changed.  */
 235   bitmap relevant;
 236
 237   /* The maximum invariant id.  */
 238   unsigned max_inv_id;
 239
 240   /* The uses of induction variables.  */
 241   VEC(iv_use_p,heap) *iv_uses;
 242
 243   /* The candidates.  */
 244   VEC(iv_cand_p,heap) *iv_candidates;
 245
 246   /* A bitmap of important candidates.  */
 247   bitmap important_candidates;
 248
 249   /* Whether to consider just related and important candidates when replacing a
 250      use.  */
 251   bool consider_all_candidates;
 252 };
 253
 254 /* An assignment of iv candidates to uses.  */
 255
 256 struct iv_ca
 257 {
 258   /* The number of uses covered by the assignment.  */
 259   unsigned upto;
 260
 261   /* Number of uses that cannot be expressed by the candidates in the set.  */
 262   unsigned bad_uses;
 263
 264   /* Candidate assigned to a use, together with the related costs.  */
 265   struct cost_pair **cand_for_use;
 266
 267   /* Number of times each candidate is used.  */
 268   unsigned *n_cand_uses;
 269
 270   /* The candidates used.  */
 271   bitmap cands;
 272
 273   /* The number of candidates in the set.  */
 274   unsigned n_cands;
 275
 276   /* Total number of registers needed.  */
 277   unsigned n_regs;
 278
 279   /* Total cost of expressing uses.  */
 280   comp_cost cand_use_cost;
 281
 282   /* Total cost of candidates.  */
 283   unsigned cand_cost;
 284
 285   /* Number of times each invariant is used.  */
 286   unsigned *n_invariant_uses;
 287
 288   /* Total cost of the assignment.  */
 289   comp_cost cost;
 290 };
 291
 292 /* Difference of two iv candidate assignments.  */
 293
 294 struct iv_ca_delta
 295 {
 296   /* Changed use.  */
 297   struct iv_use *use;
 298
 299   /* An old assignment (for rollback purposes).  */
 300   struct cost_pair *old_cp;
 301
 302   /* A new assignment.  */
 303   struct cost_pair *new_cp;
 304
 305   /* Next change in the list.  */
 306   struct iv_ca_delta *next_change;
 307 };
 308
 309 /* Bound on number of candidates below that all candidates are considered.  */
 310
 311 #define CONSIDER_ALL_CANDIDATES_BOUND \
 312   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 313
 314 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 315    optimizing such a loop would help, and it would take ages).  */
 316
 317 #define MAX_CONSIDERED_USES \
 318   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 319
 320 /* If there are at most this number of ivs in the set, try removing unnecessary
 321    ivs from the set always.  */
 322
 323 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 324   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 325
 326 /* The list of trees for that the decl_rtl field must be reset is stored
 327    here.  */
 328
 329 static VEC(tree,heap) *decl_rtl_to_reset;
 330
 331 /* Number of uses recorded in DATA.  */
 332
 333 static inline unsigned
 334 n_iv_uses (struct ivopts_data *data)
 335 {
 336   return VEC_length (iv_use_p, data->iv_uses);
 337 }
 338
 339 /* Ith use recorded in DATA.  */
 340
 341 static inline struct iv_use *
 342 iv_use (struct ivopts_data *data, unsigned i)
 343 {
 344   return VEC_index (iv_use_p, data->iv_uses, i);
 345 }
 346
 347 /* Number of candidates recorded in DATA.  */
 348
 349 static inline unsigned
 350 n_iv_cands (struct ivopts_data *data)
 351 {
 352   return VEC_length (iv_cand_p, data->iv_candidates);
 353 }
 354
 355 /* Ith candidate recorded in DATA.  */
 356
 357 static inline struct iv_cand *
 358 iv_cand (struct ivopts_data *data, unsigned i)
 359 {
 360   return VEC_index (iv_cand_p, data->iv_candidates, i);
 361 }
 362
 363 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 364
 365 edge
 366 single_dom_exit (struct loop *loop)
 367 {
 368   edge exit = single_exit (loop);
 369
 370   if (!exit)
 371     return NULL;
 372
 373   if (!just_once_each_iteration_p (loop, exit->src))
 374     return NULL;
 375
 376   return exit;
 377 }
 378
 379 /* Dumps information about the induction variable IV to FILE.  */
 380
 381 extern void dump_iv (FILE *, struct iv *);
 382 void
 383 dump_iv (FILE *file, struct iv *iv)
 384 {
 385   if (iv->ssa_name)
 386     {
 387       fprintf (file, "ssa name ");
 388       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 389       fprintf (file, "\n");
 390     }
 391
 392   fprintf (file, "  type ");
 393   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 394   fprintf (file, "\n");
 395
 396   if (iv->step)
 397     {
 398       fprintf (file, "  base ");
 399       print_generic_expr (file, iv->base, TDF_SLIM);
 400       fprintf (file, "\n");
 401
 402       fprintf (file, "  step ");
 403       print_generic_expr (file, iv->step, TDF_SLIM);
 404       fprintf (file, "\n");
 405     }
 406   else
 407     {
 408       fprintf (file, "  invariant ");
 409       print_generic_expr (file, iv->base, TDF_SLIM);
 410       fprintf (file, "\n");
 411     }
 412
 413   if (iv->base_object)
 414     {
 415       fprintf (file, "  base object ");
 416       print_generic_expr (file, iv->base_object, TDF_SLIM);
 417       fprintf (file, "\n");
 418     }
 419
 420   if (iv->biv_p)
 421     fprintf (file, "  is a biv\n");
 422 }
 423
 424 /* Dumps information about the USE to FILE.  */
 425
 426 extern void dump_use (FILE *, struct iv_use *);
 427 void
 428 dump_use (FILE *file, struct iv_use *use)
 429 {
 430   fprintf (file, "use %d\n", use->id);
 431
 432   switch (use->type)
 433     {
 434     case USE_NONLINEAR_EXPR:
 435       fprintf (file, "  generic\n");
 436       break;
 437
 438     case USE_ADDRESS:
 439       fprintf (file, "  address\n");
 440       break;
 441
 442     case USE_COMPARE:
 443       fprintf (file, "  compare\n");
 444       break;
 445
 446     default:
 447       gcc_unreachable ();
 448     }
 449
 450   fprintf (file, "  in statement ");
 451   print_gimple_stmt (file, use->stmt, 0, 0);
 452   fprintf (file, "\n");
 453
 454   fprintf (file, "  at position ");
 455   if (use->op_p)
 456     print_generic_expr (file, *use->op_p, TDF_SLIM);
 457   fprintf (file, "\n");
 458
 459   dump_iv (file, use->iv);
 460
 461   if (use->related_cands)
 462     {
 463       fprintf (file, "  related candidates ");
 464       dump_bitmap (file, use->related_cands);
 465     }
 466 }
 467
 468 /* Dumps information about the uses to FILE.  */
 469
 470 extern void dump_uses (FILE *, struct ivopts_data *);
 471 void
 472 dump_uses (FILE *file, struct ivopts_data *data)
 473 {
 474   unsigned i;
 475   struct iv_use *use;
 476
 477   for (i = 0; i < n_iv_uses (data); i++)
 478     {
 479       use = iv_use (data, i);
 480
 481       dump_use (file, use);
 482       fprintf (file, "\n");
 483     }
 484 }
 485
 486 /* Dumps information about induction variable candidate CAND to FILE.  */
 487
 488 extern void dump_cand (FILE *, struct iv_cand *);
 489 void
 490 dump_cand (FILE *file, struct iv_cand *cand)
 491 {
 492   struct iv *iv = cand->iv;
 493
 494   fprintf (file, "candidate %d%s\n",
 495            cand->id, cand->important ? " (important)" : "");
 496
 497   if (cand->depends_on)
 498     {
 499       fprintf (file, "  depends on ");
 500       dump_bitmap (file, cand->depends_on);
 501     }
 502
 503   if (!iv)
 504     {
 505       fprintf (file, "  final value replacement\n");
 506       return;
 507     }
 508
 509   switch (cand->pos)
 510     {
 511     case IP_NORMAL:
 512       fprintf (file, "  incremented before exit test\n");
 513       break;
 514
 515     case IP_END:
 516       fprintf (file, "  incremented at end\n");
 517       break;
 518
 519     case IP_ORIGINAL:
 520       fprintf (file, "  original biv\n");
 521       break;
 522     }
 523
 524   dump_iv (file, iv);
 525 }
 526
 527 /* Returns the info for ssa version VER.  */
 528
 529 static inline struct version_info *
 530 ver_info (struct ivopts_data *data, unsigned ver)
 531 {
 532   return data->version_info + ver;
 533 }
 534
 535 /* Returns the info for ssa name NAME.  */
 536
 537 static inline struct version_info *
 538 name_info (struct ivopts_data *data, tree name)
 539 {
 540   return ver_info (data, SSA_NAME_VERSION (name));
 541 }
 542
 543 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 544    emitted in LOOP.  */
 545
 546 static bool
 547 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 548 {
 549   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 550
 551   gcc_assert (bb);
 552
 553   if (sbb == loop->latch)
 554     return true;
 555
 556   if (sbb != bb)
 557     return false;
 558
 559   return stmt == last_stmt (bb);
 560 }
 561
 562 /* Returns true if STMT if after the place where the original induction
 563    variable CAND is incremented.  */
 564
 565 static bool
 566 stmt_after_ip_original_pos (struct iv_cand *cand, gimple stmt)
 567 {
 568   basic_block cand_bb = gimple_bb (cand->incremented_at);
 569   basic_block stmt_bb = gimple_bb (stmt);
 570   gimple_stmt_iterator bsi;
 571
 572   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 573     return false;
 574
 575   if (stmt_bb != cand_bb)
 576     return true;
 577
 578   /* Scan the block from the end, since the original ivs are usually
 579      incremented at the end of the loop body.  */
 580   for (bsi = gsi_last_bb (stmt_bb); ; gsi_prev (&bsi))
 581     {
 582       if (gsi_stmt (bsi) == cand->incremented_at)
 583         return false;
 584       if (gsi_stmt (bsi) == stmt)
 585         return true;
 586     }
 587 }
 588
 589 /* Returns true if STMT if after the place where the induction variable
 590    CAND is incremented in LOOP.  */
 591
 592 static bool
 593 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 594 {
 595   switch (cand->pos)
 596     {
 597     case IP_END:
 598       return false;
 599
 600     case IP_NORMAL:
 601       return stmt_after_ip_normal_pos (loop, stmt);
 602
 603     case IP_ORIGINAL:
 604       return stmt_after_ip_original_pos (cand, stmt);
 605
 606     default:
 607       gcc_unreachable ();
 608     }
 609 }
 610
 611 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 612
 613 static bool
 614 abnormal_ssa_name_p (tree exp)
 615 {
 616   if (!exp)
 617     return false;
 618
 619   if (TREE_CODE (exp) != SSA_NAME)
 620     return false;
 621
 622   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 623 }
 624
 625 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 626    abnormal phi node.  Callback for for_each_index.  */
 627
 628 static bool
 629 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 630                                   void *data ATTRIBUTE_UNUSED)
 631 {
 632   if (TREE_CODE (base) == ARRAY_REF)
 633     {
 634       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 635         return false;
 636       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 637         return false;
 638     }
 639
 640   return !abnormal_ssa_name_p (*index);
 641 }
 642
 643 /* Returns true if EXPR contains a ssa name that occurs in an
 644    abnormal phi node.  */
 645
 646 bool
 647 contains_abnormal_ssa_name_p (tree expr)
 648 {
 649   enum tree_code code;
 650   enum tree_code_class codeclass;
 651
 652   if (!expr)
 653     return false;
 654
 655   code = TREE_CODE (expr);
 656   codeclass = TREE_CODE_CLASS (code);
 657
 658   if (code == SSA_NAME)
 659     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 660
 661   if (code == INTEGER_CST
 662       || is_gimple_min_invariant (expr))
 663     return false;
 664
 665   if (code == ADDR_EXPR)
 666     return !for_each_index (&TREE_OPERAND (expr, 0),
 667                             idx_contains_abnormal_ssa_name_p,
 668                             NULL);
 669
 670   switch (codeclass)
 671     {
 672     case tcc_binary:
 673     case tcc_comparison:
 674       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 675         return true;
 676
 677       /* Fallthru.  */
 678     case tcc_unary:
 679       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 680         return true;
 681
 682       break;
 683
 684     default:
 685       gcc_unreachable ();
 686     }
 687
 688   return false;
 689 }
 690
 691 /*  Returns tree describing number of iterations determined from
 692     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 693
 694 static tree
 695 niter_for_exit (struct ivopts_data *data, edge exit)
 696 {
 697   struct tree_niter_desc desc;
 698   tree niter;
 699   void **slot;
 700
 701   if (!data->niters)
 702     {
 703       data->niters = pointer_map_create ();
 704       slot = NULL;
 705     }
 706   else
 707     slot = pointer_map_contains (data->niters, exit);
 708
 709   if (!slot)
 710     {
 711       /* Try to determine number of iterations.  We must know it
 712          unconditionally (i.e., without possibility of # of iterations
 713          being zero).  Also, we cannot safely work with ssa names that
 714          appear in phi nodes on abnormal edges, so that we do not create
 715          overlapping life ranges for them (PR 27283).  */
 716       if (number_of_iterations_exit (data->current_loop,
 717                                      exit, &desc, true)
 718           && integer_zerop (desc.may_be_zero)
 719           && !contains_abnormal_ssa_name_p (desc.niter))
 720         niter = desc.niter;
 721       else
 722         niter = NULL_TREE;
 723
 724       *pointer_map_insert (data->niters, exit) = niter;
 725     }
 726   else
 727     niter = (tree) *slot;
 728
 729   return niter;
 730 }
 731
 732 /* Returns tree describing number of iterations determined from
 733    single dominating exit of DATA->current_loop, or NULL if something
 734    goes wrong.  */
 735
 736 static tree
 737 niter_for_single_dom_exit (struct ivopts_data *data)
 738 {
 739   edge exit = single_dom_exit (data->current_loop);
 740
 741   if (!exit)
 742     return NULL;
 743
 744   return niter_for_exit (data, exit);
 745 }
 746
 747 /* Initializes data structures used by the iv optimization pass, stored
 748    in DATA.  */
 749
 750 static void
 751 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 752 {
 753   data->version_info_size = 2 * num_ssa_names;
 754   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 755   data->relevant = BITMAP_ALLOC (NULL);
 756   data->important_candidates = BITMAP_ALLOC (NULL);
 757   data->max_inv_id = 0;
 758   data->niters = NULL;
 759   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 760   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 761   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 762 }
 763
 764 /* Returns a memory object to that EXPR points.  In case we are able to
 765    determine that it does not point to any such object, NULL is returned.  */
 766
 767 static tree
 768 determine_base_object (tree expr)
 769 {
 770   enum tree_code code = TREE_CODE (expr);
 771   tree base, obj;
 772
 773   /* If this is a pointer casted to any type, we need to determine
 774      the base object for the pointer; so handle conversions before
 775      throwing away non-pointer expressions.  */
 776   if (CONVERT_EXPR_P (expr))
 777     return determine_base_object (TREE_OPERAND (expr, 0));
 778
 779   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 780     return NULL_TREE;
 781
 782   switch (code)
 783     {
 784     case INTEGER_CST:
 785       return NULL_TREE;
 786
 787     case ADDR_EXPR:
 788       obj = TREE_OPERAND (expr, 0);
 789       base = get_base_address (obj);
 790
 791       if (!base)
 792         return expr;
 793
 794       if (TREE_CODE (base) == INDIRECT_REF)
 795         return determine_base_object (TREE_OPERAND (base, 0));
 796
 797       return fold_convert (ptr_type_node,
 798                            build_fold_addr_expr (base));
 799
 800     case POINTER_PLUS_EXPR:
 801       return determine_base_object (TREE_OPERAND (expr, 0));
 802
 803     case PLUS_EXPR:
 804     case MINUS_EXPR:
 805       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 806       gcc_unreachable ();
 807
 808     default:
 809       return fold_convert (ptr_type_node, expr);
 810     }
 811 }
 812
 813 /* Allocates an induction variable with given initial value BASE and step STEP
 814    for loop LOOP.  */
 815
 816 static struct iv *
 817 alloc_iv (tree base, tree step)
 818 {
 819   struct iv *iv = XCNEW (struct iv);
 820   gcc_assert (step != NULL_TREE);
 821
 822   iv->base = base;
 823   iv->base_object = determine_base_object (base);
 824   iv->step = step;
 825   iv->biv_p = false;
 826   iv->have_use_for = false;
 827   iv->use_id = 0;
 828   iv->ssa_name = NULL_TREE;
 829
 830   return iv;
 831 }
 832
 833 /* Sets STEP and BASE for induction variable IV.  */
 834
 835 static void
 836 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 837 {
 838   struct version_info *info = name_info (data, iv);
 839
 840   gcc_assert (!info->iv);
 841
 842   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 843   info->iv = alloc_iv (base, step);
 844   info->iv->ssa_name = iv;
 845 }
 846
 847 /* Finds induction variable declaration for VAR.  */
 848
 849 static struct iv *
 850 get_iv (struct ivopts_data *data, tree var)
 851 {
 852   basic_block bb;
 853   tree type = TREE_TYPE (var);
 854
 855   if (!POINTER_TYPE_P (type)
 856       && !INTEGRAL_TYPE_P (type))
 857     return NULL;
 858
 859   if (!name_info (data, var)->iv)
 860     {
 861       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 862
 863       if (!bb
 864           || !flow_bb_inside_loop_p (data->current_loop, bb))
 865         set_iv (data, var, var, build_int_cst (type, 0));
 866     }
 867
 868   return name_info (data, var)->iv;
 869 }
 870
 871 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 872    not define a simple affine biv with nonzero step.  */
 873
 874 static tree
 875 determine_biv_step (gimple phi)
 876 {
 877   struct loop *loop = gimple_bb (phi)->loop_father;
 878   tree name = PHI_RESULT (phi);
 879   affine_iv iv;
 880
 881   if (!is_gimple_reg (name))
 882     return NULL_TREE;
 883
 884   if (!simple_iv (loop, phi, name, &iv, true))
 885     return NULL_TREE;
 886
 887   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 888 }
 889
 890 /* Finds basic ivs.  */
 891
 892 static bool
 893 find_bivs (struct ivopts_data *data)
 894 {
 895   gimple phi;
 896   tree step, type, base;
 897   bool found = false;
 898   struct loop *loop = data->current_loop;
 899   gimple_stmt_iterator psi;
 900
 901   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 902     {
 903       phi = gsi_stmt (psi);
 904
 905       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 906         continue;
 907
 908       step = determine_biv_step (phi);
 909       if (!step)
 910         continue;
 911
 912       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 913       base = expand_simple_operations (base);
 914       if (contains_abnormal_ssa_name_p (base)
 915           || contains_abnormal_ssa_name_p (step))
 916         continue;
 917
 918       type = TREE_TYPE (PHI_RESULT (phi));
 919       base = fold_convert (type, base);
 920       if (step)
 921         {
 922           if (POINTER_TYPE_P (type))
 923             step = fold_convert (sizetype, step);
 924           else
 925             step = fold_convert (type, step);
 926         }
 927
 928       set_iv (data, PHI_RESULT (phi), base, step);
 929       found = true;
 930     }
 931
 932   return found;
 933 }
 934
 935 /* Marks basic ivs.  */
 936
 937 static void
 938 mark_bivs (struct ivopts_data *data)
 939 {
 940   gimple phi;
 941   tree var;
 942   struct iv *iv, *incr_iv;
 943   struct loop *loop = data->current_loop;
 944   basic_block incr_bb;
 945   gimple_stmt_iterator psi;
 946
 947   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 948     {
 949       phi = gsi_stmt (psi);
 950
 951       iv = get_iv (data, PHI_RESULT (phi));
 952       if (!iv)
 953         continue;
 954
 955       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 956       incr_iv = get_iv (data, var);
 957       if (!incr_iv)
 958         continue;
 959
 960       /* If the increment is in the subloop, ignore it.  */
 961       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 962       if (incr_bb->loop_father != data->current_loop
 963           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 964         continue;
 965
 966       iv->biv_p = true;
 967       incr_iv->biv_p = true;
 968     }
 969 }
 970
 971 /* Checks whether STMT defines a linear induction variable and stores its
 972    parameters to IV.  */
 973
 974 static bool
 975 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
 976 {
 977   tree lhs;
 978   struct loop *loop = data->current_loop;
 979
 980   iv->base = NULL_TREE;
 981   iv->step = NULL_TREE;
 982
 983   if (gimple_code (stmt) != GIMPLE_ASSIGN)
 984     return false;
 985
 986   lhs = gimple_assign_lhs (stmt);
 987   if (TREE_CODE (lhs) != SSA_NAME)
 988     return false;
 989
 990   if (!simple_iv (loop, stmt, lhs, iv, true))
 991     return false;
 992   iv->base = expand_simple_operations (iv->base);
 993
 994   if (contains_abnormal_ssa_name_p (iv->base)
 995       || contains_abnormal_ssa_name_p (iv->step))
 996     return false;
 997
 998   return true;
 999 }
1000
1001 /* Finds general ivs in statement STMT.  */
1002
1003 static void
1004 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1005 {
1006   affine_iv iv;
1007
1008   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1009     return;
1010
1011   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1012 }
1013
1014 /* Finds general ivs in basic block BB.  */
1015
1016 static void
1017 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1018 {
1019   gimple_stmt_iterator bsi;
1020
1021   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1022     find_givs_in_stmt (data, gsi_stmt (bsi));
1023 }
1024
1025 /* Finds general ivs.  */
1026
1027 static void
1028 find_givs (struct ivopts_data *data)
1029 {
1030   struct loop *loop = data->current_loop;
1031   basic_block *body = get_loop_body_in_dom_order (loop);
1032   unsigned i;
1033
1034   for (i = 0; i < loop->num_nodes; i++)
1035     find_givs_in_bb (data, body[i]);
1036   free (body);
1037 }
1038
1039 /* For each ssa name defined in LOOP determines whether it is an induction
1040    variable and if so, its initial value and step.  */
1041
1042 static bool
1043 find_induction_variables (struct ivopts_data *data)
1044 {
1045   unsigned i;
1046   bitmap_iterator bi;
1047
1048   if (!find_bivs (data))
1049     return false;
1050
1051   find_givs (data);
1052   mark_bivs (data);
1053
1054   if (dump_file && (dump_flags & TDF_DETAILS))
1055     {
1056       tree niter = niter_for_single_dom_exit (data);
1057
1058       if (niter)
1059         {
1060           fprintf (dump_file, "  number of iterations ");
1061           print_generic_expr (dump_file, niter, TDF_SLIM);
1062           fprintf (dump_file, "\n\n");
1063         };
1064
1065       fprintf (dump_file, "Induction variables:\n\n");
1066
1067       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1068         {
1069           if (ver_info (data, i)->iv)
1070             dump_iv (dump_file, ver_info (data, i)->iv);
1071         }
1072     }
1073
1074   return true;
1075 }
1076
1077 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1078
1079 static struct iv_use *
1080 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1081             gimple stmt, enum use_type use_type)
1082 {
1083   struct iv_use *use = XCNEW (struct iv_use);
1084
1085   use->id = n_iv_uses (data);
1086   use->type = use_type;
1087   use->iv = iv;
1088   use->stmt = stmt;
1089   use->op_p = use_p;
1090   use->related_cands = BITMAP_ALLOC (NULL);
1091
1092   /* To avoid showing ssa name in the dumps, if it was not reset by the
1093      caller.  */
1094   iv->ssa_name = NULL_TREE;
1095
1096   if (dump_file && (dump_flags & TDF_DETAILS))
1097     dump_use (dump_file, use);
1098
1099   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1100
1101   return use;
1102 }
1103
1104 /* Checks whether OP is a loop-level invariant and if so, records it.
1105    NONLINEAR_USE is true if the invariant is used in a way we do not
1106    handle specially.  */
1107
1108 static void
1109 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1110 {
1111   basic_block bb;
1112   struct version_info *info;
1113
1114   if (TREE_CODE (op) != SSA_NAME
1115       || !is_gimple_reg (op))
1116     return;
1117
1118   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1119   if (bb
1120       && flow_bb_inside_loop_p (data->current_loop, bb))
1121     return;
1122
1123   info = name_info (data, op);
1124   info->name = op;
1125   info->has_nonlin_use |= nonlinear_use;
1126   if (!info->inv_id)
1127     info->inv_id = ++data->max_inv_id;
1128   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1129 }
1130
1131 /* Checks whether the use OP is interesting and if so, records it.  */
1132
1133 static struct iv_use *
1134 find_interesting_uses_op (struct ivopts_data *data, tree op)
1135 {
1136   struct iv *iv;
1137   struct iv *civ;
1138   gimple stmt;
1139   struct iv_use *use;
1140
1141   if (TREE_CODE (op) != SSA_NAME)
1142     return NULL;
1143
1144   iv = get_iv (data, op);
1145   if (!iv)
1146     return NULL;
1147
1148   if (iv->have_use_for)
1149     {
1150       use = iv_use (data, iv->use_id);
1151
1152       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1153       return use;
1154     }
1155
1156   if (integer_zerop (iv->step))
1157     {
1158       record_invariant (data, op, true);
1159       return NULL;
1160     }
1161   iv->have_use_for = true;
1162
1163   civ = XNEW (struct iv);
1164   *civ = *iv;
1165
1166   stmt = SSA_NAME_DEF_STMT (op);
1167   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1168               || is_gimple_assign (stmt));
1169
1170   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1171   iv->use_id = use->id;
1172
1173   return use;
1174 }
1175
1176 /* Given a condition in statement STMT, checks whether it is a compare
1177    of an induction variable and an invariant.  If this is the case,
1178    CONTROL_VAR is set to location of the iv, BOUND to the location of
1179    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1180    induction variable descriptions, and true is returned.  If this is not
1181    the case, CONTROL_VAR and BOUND are set to the arguments of the
1182    condition and false is returned.  */
1183
1184 static bool
1185 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1186                        tree **control_var, tree **bound,
1187                        struct iv **iv_var, struct iv **iv_bound)
1188 {
1189   /* The objects returned when COND has constant operands.  */
1190   static struct iv const_iv;
1191   static tree zero;
1192   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1193   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1194   bool ret = false;
1195
1196   if (gimple_code (stmt) == GIMPLE_COND)
1197     {
1198       op0 = gimple_cond_lhs_ptr (stmt);
1199       op1 = gimple_cond_rhs_ptr (stmt);
1200     }
1201   else
1202     {
1203       op0 = gimple_assign_rhs1_ptr (stmt);
1204       op1 = gimple_assign_rhs2_ptr (stmt);
1205     }
1206
1207   zero = integer_zero_node;
1208   const_iv.step = integer_zero_node;
1209
1210   if (TREE_CODE (*op0) == SSA_NAME)
1211     iv0 = get_iv (data, *op0);
1212   if (TREE_CODE (*op1) == SSA_NAME)
1213     iv1 = get_iv (data, *op1);
1214
1215   /* Exactly one of the compared values must be an iv, and the other one must
1216      be an invariant.  */
1217   if (!iv0 || !iv1)
1218     goto end;
1219
1220   if (integer_zerop (iv0->step))
1221     {
1222       /* Control variable may be on the other side.  */
1223       tmp_op = op0; op0 = op1; op1 = tmp_op;
1224       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1225     }
1226   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1227
1228 end:
1229   if (control_var)
1230     *control_var = op0;;
1231   if (iv_var)
1232     *iv_var = iv0;;
1233   if (bound)
1234     *bound = op1;
1235   if (iv_bound)
1236     *iv_bound = iv1;
1237
1238   return ret;
1239 }
1240
1241 /* Checks whether the condition in STMT is interesting and if so,
1242    records it.  */
1243
1244 static void
1245 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1246 {
1247   tree *var_p, *bound_p;
1248   struct iv *var_iv, *civ;
1249
1250   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1251     {
1252       find_interesting_uses_op (data, *var_p);
1253       find_interesting_uses_op (data, *bound_p);
1254       return;
1255     }
1256
1257   civ = XNEW (struct iv);
1258   *civ = *var_iv;
1259   record_use (data, NULL, civ, stmt, USE_COMPARE);
1260 }
1261
1262 /* Returns true if expression EXPR is obviously invariant in LOOP,
1263    i.e. if all its operands are defined outside of the LOOP.  LOOP
1264    should not be the function body.  */
1265
1266 bool
1267 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1268 {
1269   basic_block def_bb;
1270   unsigned i, len;
1271
1272   gcc_assert (loop_depth (loop) > 0);
1273
1274   if (is_gimple_min_invariant (expr))
1275     return true;
1276
1277   if (TREE_CODE (expr) == SSA_NAME)
1278     {
1279       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1280       if (def_bb
1281           && flow_bb_inside_loop_p (loop, def_bb))
1282         return false;
1283
1284       return true;
1285     }
1286
1287   if (!EXPR_P (expr))
1288     return false;
1289
1290   len = TREE_OPERAND_LENGTH (expr);
1291   for (i = 0; i < len; i++)
1292     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1293       return false;
1294
1295   return true;
1296 }
1297
1298 /* Returns true if statement STMT is obviously invariant in LOOP,
1299    i.e. if all its operands on the RHS are defined outside of the LOOP.
1300    LOOP should not be the function body.  */
1301
1302 bool
1303 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1304 {
1305   unsigned i;
1306   tree lhs;
1307
1308   gcc_assert (loop_depth (loop) > 0);
1309
1310   lhs = gimple_get_lhs (stmt);
1311   for (i = 0; i < gimple_num_ops (stmt); i++)
1312     {
1313       tree op = gimple_op (stmt, i);
1314       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1315         return false;
1316     }
1317
1318   return true;
1319 }
1320
1321 /* Cumulates the steps of indices into DATA and replaces their values with the
1322    initial ones.  Returns false when the value of the index cannot be determined.
1323    Callback for for_each_index.  */
1324
1325 struct ifs_ivopts_data
1326 {
1327   struct ivopts_data *ivopts_data;
1328   gimple stmt;
1329   tree step;
1330 };
1331
1332 static bool
1333 idx_find_step (tree base, tree *idx, void *data)
1334 {
1335   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1336   struct iv *iv;
1337   tree step, iv_base, iv_step, lbound, off;
1338   struct loop *loop = dta->ivopts_data->current_loop;
1339
1340   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1341       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1342     return false;
1343
1344   /* If base is a component ref, require that the offset of the reference
1345      be invariant.  */
1346   if (TREE_CODE (base) == COMPONENT_REF)
1347     {
1348       off = component_ref_field_offset (base);
1349       return expr_invariant_in_loop_p (loop, off);
1350     }
1351
1352   /* If base is array, first check whether we will be able to move the
1353      reference out of the loop (in order to take its address in strength
1354      reduction).  In order for this to work we need both lower bound
1355      and step to be loop invariants.  */
1356   if (TREE_CODE (base) == ARRAY_REF)
1357     {
1358       step = array_ref_element_size (base);
1359       lbound = array_ref_low_bound (base);
1360
1361       if (!expr_invariant_in_loop_p (loop, step)
1362           || !expr_invariant_in_loop_p (loop, lbound))
1363         return false;
1364     }
1365
1366   if (TREE_CODE (*idx) != SSA_NAME)
1367     return true;
1368
1369   iv = get_iv (dta->ivopts_data, *idx);
1370   if (!iv)
1371     return false;
1372
1373   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1374           *&x[0], which is not folded and does not trigger the
1375           ARRAY_REF path below.  */
1376   *idx = iv->base;
1377
1378   if (integer_zerop (iv->step))
1379     return true;
1380
1381   if (TREE_CODE (base) == ARRAY_REF)
1382     {
1383       step = array_ref_element_size (base);
1384
1385       /* We only handle addresses whose step is an integer constant.  */
1386       if (TREE_CODE (step) != INTEGER_CST)
1387         return false;
1388     }
1389   else
1390     /* The step for pointer arithmetics already is 1 byte.  */
1391     step = build_int_cst (sizetype, 1);
1392
1393   iv_base = iv->base;
1394   iv_step = iv->step;
1395   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1396                             sizetype, &iv_base, &iv_step, dta->stmt,
1397                             false))
1398     {
1399       /* The index might wrap.  */
1400       return false;
1401     }
1402
1403   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1404   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1405
1406   return true;
1407 }
1408
1409 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1410    object is passed to it in DATA.  */
1411
1412 static bool
1413 idx_record_use (tree base, tree *idx,
1414                 void *vdata)
1415 {
1416   struct ivopts_data *data = (struct ivopts_data *) vdata;
1417   find_interesting_uses_op (data, *idx);
1418   if (TREE_CODE (base) == ARRAY_REF)
1419     {
1420       find_interesting_uses_op (data, array_ref_element_size (base));
1421       find_interesting_uses_op (data, array_ref_low_bound (base));
1422     }
1423   return true;
1424 }
1425
1426 /* If we can prove that TOP = cst * BOT for some constant cst,
1427    store cst to MUL and return true.  Otherwise return false.
1428    The returned value is always sign-extended, regardless of the
1429    signedness of TOP and BOT.  */
1430
1431 static bool
1432 constant_multiple_of (tree top, tree bot, double_int *mul)
1433 {
1434   tree mby;
1435   enum tree_code code;
1436   double_int res, p0, p1;
1437   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1438
1439   STRIP_NOPS (top);
1440   STRIP_NOPS (bot);
1441
1442   if (operand_equal_p (top, bot, 0))
1443     {
1444       *mul = double_int_one;
1445       return true;
1446     }
1447
1448   code = TREE_CODE (top);
1449   switch (code)
1450     {
1451     case MULT_EXPR:
1452       mby = TREE_OPERAND (top, 1);
1453       if (TREE_CODE (mby) != INTEGER_CST)
1454         return false;
1455
1456       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1457         return false;
1458
1459       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1460                               precision);
1461       return true;
1462
1463     case PLUS_EXPR:
1464     case MINUS_EXPR:
1465       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1466           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1467         return false;
1468
1469       if (code == MINUS_EXPR)
1470         p1 = double_int_neg (p1);
1471       *mul = double_int_sext (double_int_add (p0, p1), precision);
1472       return true;
1473
1474     case INTEGER_CST:
1475       if (TREE_CODE (bot) != INTEGER_CST)
1476         return false;
1477
1478       p0 = double_int_sext (tree_to_double_int (top), precision);
1479       p1 = double_int_sext (tree_to_double_int (bot), precision);
1480       if (double_int_zero_p (p1))
1481         return false;
1482       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1483                               precision);
1484       return double_int_zero_p (res);
1485
1486     default:
1487       return false;
1488     }
1489 }
1490
1491 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1492
1493 static bool
1494 may_be_unaligned_p (tree ref, tree step)
1495 {
1496   tree base;
1497   tree base_type;
1498   HOST_WIDE_INT bitsize;
1499   HOST_WIDE_INT bitpos;
1500   tree toffset;
1501   enum machine_mode mode;
1502   int unsignedp, volatilep;
1503   unsigned base_align;
1504
1505   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1506      thus they are not misaligned.  */
1507   if (TREE_CODE (ref) == TARGET_MEM_REF)
1508     return false;
1509
1510   /* The test below is basically copy of what expr.c:normal_inner_ref
1511      does to check whether the object must be loaded by parts when
1512      STRICT_ALIGNMENT is true.  */
1513   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1514                               &unsignedp, &volatilep, true);
1515   base_type = TREE_TYPE (base);
1516   base_align = TYPE_ALIGN (base_type);
1517
1518   if (mode != BLKmode)
1519     {
1520       double_int mul;
1521       tree al = build_int_cst (TREE_TYPE (step),
1522                                GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1523
1524       if (base_align < GET_MODE_ALIGNMENT (mode)
1525           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1526           || bitpos % BITS_PER_UNIT != 0)
1527         return true;
1528
1529       if (!constant_multiple_of (step, al, &mul))
1530         return true;
1531     }
1532
1533   return false;
1534 }
1535
1536 /* Return true if EXPR may be non-addressable.   */
1537
1538 static bool
1539 may_be_nonaddressable_p (tree expr)
1540 {
1541   switch (TREE_CODE (expr))
1542     {
1543     case TARGET_MEM_REF:
1544       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1545          target, thus they are always addressable.  */
1546       return false;
1547
1548     case COMPONENT_REF:
1549       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1550              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1551
1552     case VIEW_CONVERT_EXPR:
1553       /* This kind of view-conversions may wrap non-addressable objects
1554          and make them look addressable.  After some processing the
1555          non-addressability may be uncovered again, causing ADDR_EXPRs
1556          of inappropriate objects to be built.  */
1557       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1558           || is_gimple_min_invariant (TREE_OPERAND (expr, 0)))
1559         return true;
1560
1561       /* ... fall through ... */
1562
1563     case ARRAY_REF:
1564     case ARRAY_RANGE_REF:
1565       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1566
1567     CASE_CONVERT:
1568       return true;
1569
1570     default:
1571       break;
1572     }
1573
1574   return false;
1575 }
1576
1577 /* Finds addresses in *OP_P inside STMT.  */
1578
1579 static void
1580 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1581 {
1582   tree base = *op_p, step = build_int_cst (sizetype, 0);
1583   struct iv *civ;
1584   struct ifs_ivopts_data ifs_ivopts_data;
1585
1586   /* Do not play with volatile memory references.  A bit too conservative,
1587      perhaps, but safe.  */
1588   if (gimple_has_volatile_ops (stmt))
1589     goto fail;
1590
1591   /* Ignore bitfields for now.  Not really something terribly complicated
1592      to handle.  TODO.  */
1593   if (TREE_CODE (base) == BIT_FIELD_REF)
1594     goto fail;
1595
1596   base = unshare_expr (base);
1597
1598   if (TREE_CODE (base) == TARGET_MEM_REF)
1599     {
1600       tree type = build_pointer_type (TREE_TYPE (base));
1601       tree astep;
1602
1603       if (TMR_BASE (base)
1604           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1605         {
1606           civ = get_iv (data, TMR_BASE (base));
1607           if (!civ)
1608             goto fail;
1609
1610           TMR_BASE (base) = civ->base;
1611           step = civ->step;
1612         }
1613       if (TMR_INDEX (base)
1614           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1615         {
1616           civ = get_iv (data, TMR_INDEX (base));
1617           if (!civ)
1618             goto fail;
1619
1620           TMR_INDEX (base) = civ->base;
1621           astep = civ->step;
1622
1623           if (astep)
1624             {
1625               if (TMR_STEP (base))
1626                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1627
1628               step = fold_build2 (PLUS_EXPR, type, step, astep);
1629             }
1630         }
1631
1632       if (integer_zerop (step))
1633         goto fail;
1634       base = tree_mem_ref_addr (type, base);
1635     }
1636   else
1637     {
1638       ifs_ivopts_data.ivopts_data = data;
1639       ifs_ivopts_data.stmt = stmt;
1640       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1641       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1642           || integer_zerop (ifs_ivopts_data.step))
1643         goto fail;
1644       step = ifs_ivopts_data.step;
1645
1646       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1647       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1648
1649       /* Check that the base expression is addressable.  This needs
1650          to be done after substituting bases of IVs into it.  */
1651       if (may_be_nonaddressable_p (base))
1652         goto fail;
1653
1654       /* Moreover, on strict alignment platforms, check that it is
1655          sufficiently aligned.  */
1656       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1657         goto fail;
1658
1659       base = build_fold_addr_expr (base);
1660
1661       /* Substituting bases of IVs into the base expression might
1662          have caused folding opportunities.  */
1663       if (TREE_CODE (base) == ADDR_EXPR)
1664         {
1665           tree *ref = &TREE_OPERAND (base, 0);
1666           while (handled_component_p (*ref))
1667             ref = &TREE_OPERAND (*ref, 0);
1668           if (TREE_CODE (*ref) == INDIRECT_REF)
1669             *ref = fold_indirect_ref (*ref);
1670         }
1671     }
1672
1673   civ = alloc_iv (base, step);
1674   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1675   return;
1676
1677 fail:
1678   for_each_index (op_p, idx_record_use, data);
1679 }
1680
1681 /* Finds and records invariants used in STMT.  */
1682
1683 static void
1684 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1685 {
1686   ssa_op_iter iter;
1687   use_operand_p use_p;
1688   tree op;
1689
1690   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1691     {
1692       op = USE_FROM_PTR (use_p);
1693       record_invariant (data, op, false);
1694     }
1695 }
1696
1697 /* Finds interesting uses of induction variables in the statement STMT.  */
1698
1699 static void
1700 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1701 {
1702   struct iv *iv;
1703   tree op, *lhs, *rhs;
1704   ssa_op_iter iter;
1705   use_operand_p use_p;
1706   enum tree_code code;
1707
1708   find_invariants_stmt (data, stmt);
1709
1710   if (gimple_code (stmt) == GIMPLE_COND)
1711     {
1712       find_interesting_uses_cond (data, stmt);
1713       return;
1714     }
1715
1716   if (is_gimple_assign (stmt))
1717     {
1718       lhs = gimple_assign_lhs_ptr (stmt);
1719       rhs = gimple_assign_rhs1_ptr (stmt);
1720
1721       if (TREE_CODE (*lhs) == SSA_NAME)
1722         {
1723           /* If the statement defines an induction variable, the uses are not
1724              interesting by themselves.  */
1725
1726           iv = get_iv (data, *lhs);
1727
1728           if (iv && !integer_zerop (iv->step))
1729             return;
1730         }
1731
1732       code = gimple_assign_rhs_code (stmt);
1733       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1734           && (REFERENCE_CLASS_P (*rhs)
1735               || is_gimple_val (*rhs)))
1736         {
1737           if (REFERENCE_CLASS_P (*rhs))
1738             find_interesting_uses_address (data, stmt, rhs);
1739           else
1740             find_interesting_uses_op (data, *rhs);
1741
1742           if (REFERENCE_CLASS_P (*lhs))
1743             find_interesting_uses_address (data, stmt, lhs);
1744           return;
1745         }
1746       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1747         {
1748           find_interesting_uses_cond (data, stmt);
1749           return;
1750         }
1751
1752       /* TODO -- we should also handle address uses of type
1753
1754          memory = call (whatever);
1755
1756          and
1757
1758          call (memory).  */
1759     }
1760
1761   if (gimple_code (stmt) == GIMPLE_PHI
1762       && gimple_bb (stmt) == data->current_loop->header)
1763     {
1764       iv = get_iv (data, PHI_RESULT (stmt));
1765
1766       if (iv && !integer_zerop (iv->step))
1767         return;
1768     }
1769
1770   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1771     {
1772       op = USE_FROM_PTR (use_p);
1773
1774       if (TREE_CODE (op) != SSA_NAME)
1775         continue;
1776
1777       iv = get_iv (data, op);
1778       if (!iv)
1779         continue;
1780
1781       find_interesting_uses_op (data, op);
1782     }
1783 }
1784
1785 /* Finds interesting uses of induction variables outside of loops
1786    on loop exit edge EXIT.  */
1787
1788 static void
1789 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1790 {
1791   gimple phi;
1792   gimple_stmt_iterator psi;
1793   tree def;
1794
1795   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1796     {
1797       phi = gsi_stmt (psi);
1798       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1799       if (is_gimple_reg (def))
1800         find_interesting_uses_op (data, def);
1801     }
1802 }
1803
1804 /* Finds uses of the induction variables that are interesting.  */
1805
1806 static void
1807 find_interesting_uses (struct ivopts_data *data)
1808 {
1809   basic_block bb;
1810   gimple_stmt_iterator bsi;
1811   basic_block *body = get_loop_body (data->current_loop);
1812   unsigned i;
1813   struct version_info *info;
1814   edge e;
1815
1816   if (dump_file && (dump_flags & TDF_DETAILS))
1817     fprintf (dump_file, "Uses:\n\n");
1818
1819   for (i = 0; i < data->current_loop->num_nodes; i++)
1820     {
1821       edge_iterator ei;
1822       bb = body[i];
1823
1824       FOR_EACH_EDGE (e, ei, bb->succs)
1825         if (e->dest != EXIT_BLOCK_PTR
1826             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1827           find_interesting_uses_outside (data, e);
1828
1829       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1830         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1831       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1832         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1833     }
1834
1835   if (dump_file && (dump_flags & TDF_DETAILS))
1836     {
1837       bitmap_iterator bi;
1838
1839       fprintf (dump_file, "\n");
1840
1841       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1842         {
1843           info = ver_info (data, i);
1844           if (info->inv_id)
1845             {
1846               fprintf (dump_file, "  ");
1847               print_generic_expr (dump_file, info->name, TDF_SLIM);
1848               fprintf (dump_file, " is invariant (%d)%s\n",
1849                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1850             }
1851         }
1852
1853       fprintf (dump_file, "\n");
1854     }
1855
1856   free (body);
1857 }
1858
1859 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1860    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1861    we are at the top-level of the processed address.  */
1862
1863 static tree
1864 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1865                 unsigned HOST_WIDE_INT *offset)
1866 {
1867   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1868   enum tree_code code;
1869   tree type, orig_type = TREE_TYPE (expr);
1870   unsigned HOST_WIDE_INT off0, off1, st;
1871   tree orig_expr = expr;
1872
1873   STRIP_NOPS (expr);
1874
1875   type = TREE_TYPE (expr);
1876   code = TREE_CODE (expr);
1877   *offset = 0;
1878
1879   switch (code)
1880     {
1881     case INTEGER_CST:
1882       if (!cst_and_fits_in_hwi (expr)
1883           || integer_zerop (expr))
1884         return orig_expr;
1885
1886       *offset = int_cst_value (expr);
1887       return build_int_cst (orig_type, 0);
1888
1889     case POINTER_PLUS_EXPR:
1890     case PLUS_EXPR:
1891     case MINUS_EXPR:
1892       op0 = TREE_OPERAND (expr, 0);
1893       op1 = TREE_OPERAND (expr, 1);
1894
1895       op0 = strip_offset_1 (op0, false, false, &off0);
1896       op1 = strip_offset_1 (op1, false, false, &off1);
1897
1898       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1899       if (op0 == TREE_OPERAND (expr, 0)
1900           && op1 == TREE_OPERAND (expr, 1))
1901         return orig_expr;
1902
1903       if (integer_zerop (op1))
1904         expr = op0;
1905       else if (integer_zerop (op0))
1906         {
1907           if (code == MINUS_EXPR)
1908             expr = fold_build1 (NEGATE_EXPR, type, op1);
1909           else
1910             expr = op1;
1911         }
1912       else
1913         expr = fold_build2 (code, type, op0, op1);
1914
1915       return fold_convert (orig_type, expr);
1916
1917     case ARRAY_REF:
1918       if (!inside_addr)
1919         return orig_expr;
1920
1921       step = array_ref_element_size (expr);
1922       if (!cst_and_fits_in_hwi (step))
1923         break;
1924
1925       st = int_cst_value (step);
1926       op1 = TREE_OPERAND (expr, 1);
1927       op1 = strip_offset_1 (op1, false, false, &off1);
1928       *offset = off1 * st;
1929
1930       if (top_compref
1931           && integer_zerop (op1))
1932         {
1933           /* Strip the component reference completely.  */
1934           op0 = TREE_OPERAND (expr, 0);
1935           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1936           *offset += off0;
1937           return op0;
1938         }
1939       break;
1940
1941     case COMPONENT_REF:
1942       if (!inside_addr)
1943         return orig_expr;
1944
1945       tmp = component_ref_field_offset (expr);
1946       if (top_compref
1947           && cst_and_fits_in_hwi (tmp))
1948         {
1949           /* Strip the component reference completely.  */
1950           op0 = TREE_OPERAND (expr, 0);
1951           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1952           *offset = off0 + int_cst_value (tmp);
1953           return op0;
1954         }
1955       break;
1956
1957     case ADDR_EXPR:
1958       op0 = TREE_OPERAND (expr, 0);
1959       op0 = strip_offset_1 (op0, true, true, &off0);
1960       *offset += off0;
1961
1962       if (op0 == TREE_OPERAND (expr, 0))
1963         return orig_expr;
1964
1965       expr = build_fold_addr_expr (op0);
1966       return fold_convert (orig_type, expr);
1967
1968     case INDIRECT_REF:
1969       inside_addr = false;
1970       break;
1971
1972     default:
1973       return orig_expr;
1974     }
1975
1976   /* Default handling of expressions for that we want to recurse into
1977      the first operand.  */
1978   op0 = TREE_OPERAND (expr, 0);
1979   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1980   *offset += off0;
1981
1982   if (op0 == TREE_OPERAND (expr, 0)
1983       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1984     return orig_expr;
1985
1986   expr = copy_node (expr);
1987   TREE_OPERAND (expr, 0) = op0;
1988   if (op1)
1989     TREE_OPERAND (expr, 1) = op1;
1990
1991   /* Inside address, we might strip the top level component references,
1992      thus changing type of the expression.  Handling of ADDR_EXPR
1993      will fix that.  */
1994   expr = fold_convert (orig_type, expr);
1995
1996   return expr;
1997 }
1998
1999 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2000
2001 static tree
2002 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2003 {
2004   return strip_offset_1 (expr, false, false, offset);
2005 }
2006
2007 /* Returns variant of TYPE that can be used as base for different uses.
2008    We return unsigned type with the same precision, which avoids problems
2009    with overflows.  */
2010
2011 static tree
2012 generic_type_for (tree type)
2013 {
2014   if (POINTER_TYPE_P (type))
2015     return unsigned_type_for (type);
2016
2017   if (TYPE_UNSIGNED (type))
2018     return type;
2019
2020   return unsigned_type_for (type);
2021 }
2022
2023 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2024    the bitmap to that we should store it.  */
2025
2026 static struct ivopts_data *fd_ivopts_data;
2027 static tree
2028 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2029 {
2030   bitmap *depends_on = (bitmap *) data;
2031   struct version_info *info;
2032
2033   if (TREE_CODE (*expr_p) != SSA_NAME)
2034     return NULL_TREE;
2035   info = name_info (fd_ivopts_data, *expr_p);
2036
2037   if (!info->inv_id || info->has_nonlin_use)
2038     return NULL_TREE;
2039
2040   if (!*depends_on)
2041     *depends_on = BITMAP_ALLOC (NULL);
2042   bitmap_set_bit (*depends_on, info->inv_id);
2043
2044   return NULL_TREE;
2045 }
2046
2047 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2048    position to POS.  If USE is not NULL, the candidate is set as related to
2049    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2050    replacement of the final value of the iv by a direct computation.  */
2051
2052 static struct iv_cand *
2053 add_candidate_1 (struct ivopts_data *data,
2054                  tree base, tree step, bool important, enum iv_position pos,
2055                  struct iv_use *use, gimple incremented_at)
2056 {
2057   unsigned i;
2058   struct iv_cand *cand = NULL;
2059   tree type, orig_type;
2060
2061   if (base)
2062     {
2063       orig_type = TREE_TYPE (base);
2064       type = generic_type_for (orig_type);
2065       /* Don't convert the base to the generic type for pointers as the generic
2066          type is an integer type with the same size as the pointer type.  */
2067       if (type != orig_type && !POINTER_TYPE_P (orig_type))
2068         {
2069           base = fold_convert (type, base);
2070           step = fold_convert (type, step);
2071         }
2072     }
2073
2074   for (i = 0; i < n_iv_cands (data); i++)
2075     {
2076       cand = iv_cand (data, i);
2077
2078       if (cand->pos != pos)
2079         continue;
2080
2081       if (cand->incremented_at != incremented_at)
2082         continue;
2083
2084       if (!cand->iv)
2085         {
2086           if (!base && !step)
2087             break;
2088
2089           continue;
2090         }
2091
2092       if (!base && !step)
2093         continue;
2094
2095       if (operand_equal_p (base, cand->iv->base, 0)
2096           && operand_equal_p (step, cand->iv->step, 0))
2097         break;
2098     }
2099
2100   if (i == n_iv_cands (data))
2101     {
2102       cand = XCNEW (struct iv_cand);
2103       cand->id = i;
2104
2105       if (!base && !step)
2106         cand->iv = NULL;
2107       else
2108         cand->iv = alloc_iv (base, step);
2109
2110       cand->pos = pos;
2111       if (pos != IP_ORIGINAL && cand->iv)
2112         {
2113           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2114           cand->var_after = cand->var_before;
2115         }
2116       cand->important = important;
2117       cand->incremented_at = incremented_at;
2118       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2119
2120       if (step
2121           && TREE_CODE (step) != INTEGER_CST)
2122         {
2123           fd_ivopts_data = data;
2124           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2125         }
2126
2127       if (dump_file && (dump_flags & TDF_DETAILS))
2128         dump_cand (dump_file, cand);
2129     }
2130
2131   if (important && !cand->important)
2132     {
2133       cand->important = true;
2134       if (dump_file && (dump_flags & TDF_DETAILS))
2135         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2136     }
2137
2138   if (use)
2139     {
2140       bitmap_set_bit (use->related_cands, i);
2141       if (dump_file && (dump_flags & TDF_DETAILS))
2142         fprintf (dump_file, "Candidate %d is related to use %d\n",
2143                  cand->id, use->id);
2144     }
2145
2146   return cand;
2147 }
2148
2149 /* Returns true if incrementing the induction variable at the end of the LOOP
2150    is allowed.
2151
2152    The purpose is to avoid splitting latch edge with a biv increment, thus
2153    creating a jump, possibly confusing other optimization passes and leaving
2154    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2155    is not available (so we do not have a better alternative), or if the latch
2156    edge is already nonempty.  */
2157
2158 static bool
2159 allow_ip_end_pos_p (struct loop *loop)
2160 {
2161   if (!ip_normal_pos (loop))
2162     return true;
2163
2164   if (!empty_block_p (ip_end_pos (loop)))
2165     return true;
2166
2167   return false;
2168 }
2169
2170 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2171    position to POS.  If USE is not NULL, the candidate is set as related to
2172    it.  The candidate computation is scheduled on all available positions.  */
2173
2174 static void
2175 add_candidate (struct ivopts_data *data,
2176                tree base, tree step, bool important, struct iv_use *use)
2177 {
2178   if (ip_normal_pos (data->current_loop))
2179     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2180   if (ip_end_pos (data->current_loop)
2181       && allow_ip_end_pos_p (data->current_loop))
2182     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2183 }
2184
2185 /* Add a standard "0 + 1 * iteration" iv candidate for a
2186    type with SIZE bits.  */
2187
2188 static void
2189 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2190                                      unsigned int size)
2191 {
2192   tree type = lang_hooks.types.type_for_size (size, true);
2193   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2194                  true, NULL);
2195 }
2196
2197 /* Adds standard iv candidates.  */
2198
2199 static void
2200 add_standard_iv_candidates (struct ivopts_data *data)
2201 {
2202   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2203
2204   /* The same for a double-integer type if it is still fast enough.  */
2205   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2206     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2207 }
2208
2209
2210 /* Adds candidates bases on the old induction variable IV.  */
2211
2212 static void
2213 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2214 {
2215   gimple phi;
2216   tree def;
2217   struct iv_cand *cand;
2218
2219   add_candidate (data, iv->base, iv->step, true, NULL);
2220
2221   /* The same, but with initial value zero.  */
2222   add_candidate (data,
2223                  build_int_cst (TREE_TYPE (iv->base), 0),
2224                  iv->step, true, NULL);
2225
2226   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2227   if (gimple_code (phi) == GIMPLE_PHI)
2228     {
2229       /* Additionally record the possibility of leaving the original iv
2230          untouched.  */
2231       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2232       cand = add_candidate_1 (data,
2233                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2234                               SSA_NAME_DEF_STMT (def));
2235       cand->var_before = iv->ssa_name;
2236       cand->var_after = def;
2237     }
2238 }
2239
2240 /* Adds candidates based on the old induction variables.  */
2241
2242 static void
2243 add_old_ivs_candidates (struct ivopts_data *data)
2244 {
2245   unsigned i;
2246   struct iv *iv;
2247   bitmap_iterator bi;
2248
2249   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2250     {
2251       iv = ver_info (data, i)->iv;
2252       if (iv && iv->biv_p && !integer_zerop (iv->step))
2253         add_old_iv_candidates (data, iv);
2254     }
2255 }
2256
2257 /* Adds candidates based on the value of the induction variable IV and USE.  */
2258
2259 static void
2260 add_iv_value_candidates (struct ivopts_data *data,
2261                          struct iv *iv, struct iv_use *use)
2262 {
2263   unsigned HOST_WIDE_INT offset;
2264   tree base;
2265   tree basetype;
2266
2267   add_candidate (data, iv->base, iv->step, false, use);
2268
2269   /* The same, but with initial value zero.  Make such variable important,
2270      since it is generic enough so that possibly many uses may be based
2271      on it.  */
2272   basetype = TREE_TYPE (iv->base);
2273   if (POINTER_TYPE_P (basetype))
2274     basetype = sizetype;
2275   add_candidate (data, build_int_cst (basetype, 0),
2276                  iv->step, true, use);
2277
2278   /* Third, try removing the constant offset.  */
2279   base = strip_offset (iv->base, &offset);
2280   if (offset)
2281     add_candidate (data, base, iv->step, false, use);
2282 }
2283
2284 /* Adds candidates based on the uses.  */
2285
2286 static void
2287 add_derived_ivs_candidates (struct ivopts_data *data)
2288 {
2289   unsigned i;
2290
2291   for (i = 0; i < n_iv_uses (data); i++)
2292     {
2293       struct iv_use *use = iv_use (data, i);
2294
2295       if (!use)
2296         continue;
2297
2298       switch (use->type)
2299         {
2300         case USE_NONLINEAR_EXPR:
2301         case USE_COMPARE:
2302         case USE_ADDRESS:
2303           /* Just add the ivs based on the value of the iv used here.  */
2304           add_iv_value_candidates (data, use->iv, use);
2305           break;
2306
2307         default:
2308           gcc_unreachable ();
2309         }
2310     }
2311 }
2312
2313 /* Record important candidates and add them to related_cands bitmaps
2314    if needed.  */
2315
2316 static void
2317 record_important_candidates (struct ivopts_data *data)
2318 {
2319   unsigned i;
2320   struct iv_use *use;
2321
2322   for (i = 0; i < n_iv_cands (data); i++)
2323     {
2324       struct iv_cand *cand = iv_cand (data, i);
2325
2326       if (cand->important)
2327         bitmap_set_bit (data->important_candidates, i);
2328     }
2329
2330   data->consider_all_candidates = (n_iv_cands (data)
2331                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2332
2333   if (data->consider_all_candidates)
2334     {
2335       /* We will not need "related_cands" bitmaps in this case,
2336          so release them to decrease peak memory consumption.  */
2337       for (i = 0; i < n_iv_uses (data); i++)
2338         {
2339           use = iv_use (data, i);
2340           BITMAP_FREE (use->related_cands);
2341         }
2342     }
2343   else
2344     {
2345       /* Add important candidates to the related_cands bitmaps.  */
2346       for (i = 0; i < n_iv_uses (data); i++)
2347         bitmap_ior_into (iv_use (data, i)->related_cands,
2348                          data->important_candidates);
2349     }
2350 }
2351
2352 /* Finds the candidates for the induction variables.  */
2353
2354 static void
2355 find_iv_candidates (struct ivopts_data *data)
2356 {
2357   /* Add commonly used ivs.  */
2358   add_standard_iv_candidates (data);
2359
2360   /* Add old induction variables.  */
2361   add_old_ivs_candidates (data);
2362
2363   /* Add induction variables derived from uses.  */
2364   add_derived_ivs_candidates (data);
2365
2366   /* Record the important candidates.  */
2367   record_important_candidates (data);
2368 }
2369
2370 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2371    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2372    we allocate a simple list to every use.  */
2373
2374 static void
2375 alloc_use_cost_map (struct ivopts_data *data)
2376 {
2377   unsigned i, size, s, j;
2378
2379   for (i = 0; i < n_iv_uses (data); i++)
2380     {
2381       struct iv_use *use = iv_use (data, i);
2382       bitmap_iterator bi;
2383
2384       if (data->consider_all_candidates)
2385         size = n_iv_cands (data);
2386       else
2387         {
2388           s = 0;
2389           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2390             {
2391               s++;
2392             }
2393
2394           /* Round up to the power of two, so that moduling by it is fast.  */
2395           for (size = 1; size < s; size <<= 1)
2396             continue;
2397         }
2398
2399       use->n_map_members = size;
2400       use->cost_map = XCNEWVEC (struct cost_pair, size);
2401     }
2402 }
2403
2404 /* Returns description of computation cost of expression whose runtime
2405    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2406
2407 static comp_cost
2408 new_cost (unsigned runtime, unsigned complexity)
2409 {
2410   comp_cost cost;
2411
2412   cost.cost = runtime;
2413   cost.complexity = complexity;
2414
2415   return cost;
2416 }
2417
2418 /* Adds costs COST1 and COST2.  */
2419
2420 static comp_cost
2421 add_costs (comp_cost cost1, comp_cost cost2)
2422 {
2423   cost1.cost += cost2.cost;
2424   cost1.complexity += cost2.complexity;
2425
2426   return cost1;
2427 }
2428 /* Subtracts costs COST1 and COST2.  */
2429
2430 static comp_cost
2431 sub_costs (comp_cost cost1, comp_cost cost2)
2432 {
2433   cost1.cost -= cost2.cost;
2434   cost1.complexity -= cost2.complexity;
2435
2436   return cost1;
2437 }
2438
2439 /* Returns a negative number if COST1 < COST2, a positive number if
2440    COST1 > COST2, and 0 if COST1 = COST2.  */
2441
2442 static int
2443 compare_costs (comp_cost cost1, comp_cost cost2)
2444 {
2445   if (cost1.cost == cost2.cost)
2446     return cost1.complexity - cost2.complexity;
2447
2448   return cost1.cost - cost2.cost;
2449 }
2450
2451 /* Returns true if COST is infinite.  */
2452
2453 static bool
2454 infinite_cost_p (comp_cost cost)
2455 {
2456   return cost.cost == INFTY;
2457 }
2458
2459 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2460    on invariants DEPENDS_ON and that the value used in expressing it
2461    is VALUE.*/
2462
2463 static void
2464 set_use_iv_cost (struct ivopts_data *data,
2465                  struct iv_use *use, struct iv_cand *cand,
2466                  comp_cost cost, bitmap depends_on, tree value)
2467 {
2468   unsigned i, s;
2469
2470   if (infinite_cost_p (cost))
2471     {
2472       BITMAP_FREE (depends_on);
2473       return;
2474     }
2475
2476   if (data->consider_all_candidates)
2477     {
2478       use->cost_map[cand->id].cand = cand;
2479       use->cost_map[cand->id].cost = cost;
2480       use->cost_map[cand->id].depends_on = depends_on;
2481       use->cost_map[cand->id].value = value;
2482       return;
2483     }
2484
2485   /* n_map_members is a power of two, so this computes modulo.  */
2486   s = cand->id & (use->n_map_members - 1);
2487   for (i = s; i < use->n_map_members; i++)
2488     if (!use->cost_map[i].cand)
2489       goto found;
2490   for (i = 0; i < s; i++)
2491     if (!use->cost_map[i].cand)
2492       goto found;
2493
2494   gcc_unreachable ();
2495
2496 found:
2497   use->cost_map[i].cand = cand;
2498   use->cost_map[i].cost = cost;
2499   use->cost_map[i].depends_on = depends_on;
2500   use->cost_map[i].value = value;
2501 }
2502
2503 /* Gets cost of (USE, CANDIDATE) pair.  */
2504
2505 static struct cost_pair *
2506 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2507                  struct iv_cand *cand)
2508 {
2509   unsigned i, s;
2510   struct cost_pair *ret;
2511
2512   if (!cand)
2513     return NULL;
2514
2515   if (data->consider_all_candidates)
2516     {
2517       ret = use->cost_map + cand->id;
2518       if (!ret->cand)
2519         return NULL;
2520
2521       return ret;
2522     }
2523
2524   /* n_map_members is a power of two, so this computes modulo.  */
2525   s = cand->id & (use->n_map_members - 1);
2526   for (i = s; i < use->n_map_members; i++)
2527     if (use->cost_map[i].cand == cand)
2528       return use->cost_map + i;
2529
2530   for (i = 0; i < s; i++)
2531     if (use->cost_map[i].cand == cand)
2532       return use->cost_map + i;
2533
2534   return NULL;
2535 }
2536
2537 /* Returns estimate on cost of computing SEQ.  */
2538
2539 static unsigned
2540 seq_cost (rtx seq)
2541 {
2542   unsigned cost = 0;
2543   rtx set;
2544
2545   for (; seq; seq = NEXT_INSN (seq))
2546     {
2547       set = single_set (seq);
2548       if (set)
2549         cost += rtx_cost (set, SET);
2550       else
2551         cost++;
2552     }
2553
2554   return cost;
2555 }
2556
2557 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2558 static rtx
2559 produce_memory_decl_rtl (tree obj, int *regno)
2560 {
2561   rtx x;
2562
2563   gcc_assert (obj);
2564   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2565     {
2566       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2567       x = gen_rtx_SYMBOL_REF (Pmode, name);
2568       SET_SYMBOL_REF_DECL (x, obj);
2569       x = gen_rtx_MEM (DECL_MODE (obj), x);
2570       targetm.encode_section_info (obj, x, true);
2571     }
2572   else
2573     {
2574       x = gen_raw_REG (Pmode, (*regno)++);
2575       x = gen_rtx_MEM (DECL_MODE (obj), x);
2576     }
2577
2578   return x;
2579 }
2580
2581 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2582    walk_tree.  DATA contains the actual fake register number.  */
2583
2584 static tree
2585 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2586 {
2587   tree obj = NULL_TREE;
2588   rtx x = NULL_RTX;
2589   int *regno = (int *) data;
2590
2591   switch (TREE_CODE (*expr_p))
2592     {
2593     case ADDR_EXPR:
2594       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2595            handled_component_p (*expr_p);
2596            expr_p = &TREE_OPERAND (*expr_p, 0))
2597         continue;
2598       obj = *expr_p;
2599       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2600         x = produce_memory_decl_rtl (obj, regno);
2601       break;
2602
2603     case SSA_NAME:
2604       *ws = 0;
2605       obj = SSA_NAME_VAR (*expr_p);
2606       if (!DECL_RTL_SET_P (obj))
2607         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2608       break;
2609
2610     case VAR_DECL:
2611     case PARM_DECL:
2612     case RESULT_DECL:
2613       *ws = 0;
2614       obj = *expr_p;
2615
2616       if (DECL_RTL_SET_P (obj))
2617         break;
2618
2619       if (DECL_MODE (obj) == BLKmode)
2620         x = produce_memory_decl_rtl (obj, regno);
2621       else
2622         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2623
2624       break;
2625
2626     default:
2627       break;
2628     }
2629
2630   if (x)
2631     {
2632       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2633       SET_DECL_RTL (obj, x);
2634     }
2635
2636   return NULL_TREE;
2637 }
2638
2639 /* Determines cost of the computation of EXPR.  */
2640
2641 static unsigned
2642 computation_cost (tree expr)
2643 {
2644   rtx seq, rslt;
2645   tree type = TREE_TYPE (expr);
2646   unsigned cost;
2647   /* Avoid using hard regs in ways which may be unsupported.  */
2648   int regno = LAST_VIRTUAL_REGISTER + 1;
2649
2650   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2651   start_sequence ();
2652   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2653   seq = get_insns ();
2654   end_sequence ();
2655
2656   cost = seq_cost (seq);
2657   if (MEM_P (rslt))
2658     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2659
2660   return cost;
2661 }
2662
2663 /* Returns variable containing the value of candidate CAND at statement AT.  */
2664
2665 static tree
2666 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2667 {
2668   if (stmt_after_increment (loop, cand, stmt))
2669     return cand->var_after;
2670   else
2671     return cand->var_before;
2672 }
2673
2674 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2675    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2676
2677 int
2678 tree_int_cst_sign_bit (const_tree t)
2679 {
2680   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2681   unsigned HOST_WIDE_INT w;
2682
2683   if (bitno < HOST_BITS_PER_WIDE_INT)
2684     w = TREE_INT_CST_LOW (t);
2685   else
2686     {
2687       w = TREE_INT_CST_HIGH (t);
2688       bitno -= HOST_BITS_PER_WIDE_INT;
2689     }
2690
2691   return (w >> bitno) & 1;
2692 }
2693
2694 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2695    same precision that is at least as wide as the precision of TYPE, stores
2696    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2697    type of A and B.  */
2698
2699 static tree
2700 determine_common_wider_type (tree *a, tree *b)
2701 {
2702   tree wider_type = NULL;
2703   tree suba, subb;
2704   tree atype = TREE_TYPE (*a);
2705
2706   if (CONVERT_EXPR_P (*a))
2707     {
2708       suba = TREE_OPERAND (*a, 0);
2709       wider_type = TREE_TYPE (suba);
2710       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2711         return atype;
2712     }
2713   else
2714     return atype;
2715
2716   if (CONVERT_EXPR_P (*b))
2717     {
2718       subb = TREE_OPERAND (*b, 0);
2719       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2720         return atype;
2721     }
2722   else
2723     return atype;
2724
2725   *a = suba;
2726   *b = subb;
2727   return wider_type;
2728 }
2729
2730 /* Determines the expression by that USE is expressed from induction variable
2731    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2732    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2733
2734 static bool
2735 get_computation_aff (struct loop *loop,
2736                      struct iv_use *use, struct iv_cand *cand, gimple at,
2737                      struct affine_tree_combination *aff)
2738 {
2739   tree ubase = use->iv->base;
2740   tree ustep = use->iv->step;
2741   tree cbase = cand->iv->base;
2742   tree cstep = cand->iv->step, cstep_common;
2743   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2744   tree common_type, var;
2745   tree uutype;
2746   aff_tree cbase_aff, var_aff;
2747   double_int rat;
2748
2749   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2750     {
2751       /* We do not have a precision to express the values of use.  */
2752       return false;
2753     }
2754
2755   var = var_at_stmt (loop, cand, at);
2756   uutype = unsigned_type_for (utype);
2757
2758   /* If the conversion is not noop, perform it.  */
2759   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2760     {
2761       cstep = fold_convert (uutype, cstep);
2762       cbase = fold_convert (uutype, cbase);
2763       var = fold_convert (uutype, var);
2764     }
2765
2766   if (!constant_multiple_of (ustep, cstep, &rat))
2767     return false;
2768
2769   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2770      type, we achieve better folding by computing their difference in this
2771      wider type, and cast the result to UUTYPE.  We do not need to worry about
2772      overflows, as all the arithmetics will in the end be performed in UUTYPE
2773      anyway.  */
2774   common_type = determine_common_wider_type (&ubase, &cbase);
2775
2776   /* use = ubase - ratio * cbase + ratio * var.  */
2777   tree_to_aff_combination (ubase, common_type, aff);
2778   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2779   tree_to_aff_combination (var, uutype, &var_aff);
2780
2781   /* We need to shift the value if we are after the increment.  */
2782   if (stmt_after_increment (loop, cand, at))
2783     {
2784       aff_tree cstep_aff;
2785
2786       if (common_type != uutype)
2787         cstep_common = fold_convert (common_type, cstep);
2788       else
2789         cstep_common = cstep;
2790
2791       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2792       aff_combination_add (&cbase_aff, &cstep_aff);
2793     }
2794
2795   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2796   aff_combination_add (aff, &cbase_aff);
2797   if (common_type != uutype)
2798     aff_combination_convert (aff, uutype);
2799
2800   aff_combination_scale (&var_aff, rat);
2801   aff_combination_add (aff, &var_aff);
2802
2803   return true;
2804 }
2805
2806 /* Determines the expression by that USE is expressed from induction variable
2807    CAND at statement AT in LOOP.  The computation is unshared.  */
2808
2809 static tree
2810 get_computation_at (struct loop *loop,
2811                     struct iv_use *use, struct iv_cand *cand, gimple at)
2812 {
2813   aff_tree aff;
2814   tree type = TREE_TYPE (use->iv->base);
2815
2816   if (!get_computation_aff (loop, use, cand, at, &aff))
2817     return NULL_TREE;
2818   unshare_aff_combination (&aff);
2819   return fold_convert (type, aff_combination_to_tree (&aff));
2820 }
2821
2822 /* Determines the expression by that USE is expressed from induction variable
2823    CAND in LOOP.  The computation is unshared.  */
2824
2825 static tree
2826 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2827 {
2828   return get_computation_at (loop, use, cand, use->stmt);
2829 }
2830
2831 /* Returns cost of addition in MODE.  */
2832
2833 static unsigned
2834 add_cost (enum machine_mode mode)
2835 {
2836   static unsigned costs[NUM_MACHINE_MODES];
2837   rtx seq;
2838   unsigned cost;
2839
2840   if (costs[mode])
2841     return costs[mode];
2842
2843   start_sequence ();
2844   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2845                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2846                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2847                  NULL_RTX);
2848   seq = get_insns ();
2849   end_sequence ();
2850
2851   cost = seq_cost (seq);
2852   if (!cost)
2853     cost = 1;
2854
2855   costs[mode] = cost;
2856
2857   if (dump_file && (dump_flags & TDF_DETAILS))
2858     fprintf (dump_file, "Addition in %s costs %d\n",
2859              GET_MODE_NAME (mode), cost);
2860   return cost;
2861 }
2862
2863 /* Entry in a hashtable of already known costs for multiplication.  */
2864 struct mbc_entry
2865 {
2866   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2867   enum machine_mode mode;       /* In mode.  */
2868   unsigned cost;                /* The cost.  */
2869 };
2870
2871 /* Counts hash value for the ENTRY.  */
2872
2873 static hashval_t
2874 mbc_entry_hash (const void *entry)
2875 {
2876   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2877
2878   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2879 }
2880
2881 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2882
2883 static int
2884 mbc_entry_eq (const void *entry1, const void *entry2)
2885 {
2886   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2887   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2888
2889   return (e1->mode == e2->mode
2890           && e1->cst == e2->cst);
2891 }
2892
2893 /* Returns cost of multiplication by constant CST in MODE.  */
2894
2895 unsigned
2896 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2897 {
2898   static htab_t costs;
2899   struct mbc_entry **cached, act;
2900   rtx seq;
2901   unsigned cost;
2902
2903   if (!costs)
2904     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2905
2906   act.mode = mode;
2907   act.cst = cst;
2908   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2909   if (*cached)
2910     return (*cached)->cost;
2911
2912   *cached = XNEW (struct mbc_entry);
2913   (*cached)->mode = mode;
2914   (*cached)->cst = cst;
2915
2916   start_sequence ();
2917   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2918                gen_int_mode (cst, mode), NULL_RTX, 0);
2919   seq = get_insns ();
2920   end_sequence ();
2921
2922   cost = seq_cost (seq);
2923
2924   if (dump_file && (dump_flags & TDF_DETAILS))
2925     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2926              (int) cst, GET_MODE_NAME (mode), cost);
2927
2928   (*cached)->cost = cost;
2929
2930   return cost;
2931 }
2932
2933 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2934    validity for a memory reference accessing memory of mode MODE.  */
2935
2936 bool
2937 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2938 {
2939 #define MAX_RATIO 128
2940   static sbitmap valid_mult[MAX_MACHINE_MODE];
2941
2942   if (!valid_mult[mode])
2943     {
2944       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2945       rtx addr;
2946       HOST_WIDE_INT i;
2947
2948       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2949       sbitmap_zero (valid_mult[mode]);
2950       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2951       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2952         {
2953           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2954           if (memory_address_p (mode, addr))
2955             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2956         }
2957
2958       if (dump_file && (dump_flags & TDF_DETAILS))
2959         {
2960           fprintf (dump_file, "  allowed multipliers:");
2961           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2962             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2963               fprintf (dump_file, " %d", (int) i);
2964           fprintf (dump_file, "\n");
2965           fprintf (dump_file, "\n");
2966         }
2967     }
2968
2969   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2970     return false;
2971
2972   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2973 }
2974
2975 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2976    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2977    variable is omitted.  Compute the cost for a memory reference that accesses
2978    a memory location of mode MEM_MODE.
2979
2980    TODO -- there must be some better way.  This all is quite crude.  */
2981
2982 static comp_cost
2983 get_address_cost (bool symbol_present, bool var_present,
2984                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2985                   enum machine_mode mem_mode)
2986 {
2987   static bool initialized[MAX_MACHINE_MODE];
2988   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2989   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2990   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2991   unsigned cost, acost, complexity;
2992   bool offset_p, ratio_p;
2993   HOST_WIDE_INT s_offset;
2994   unsigned HOST_WIDE_INT mask;
2995   unsigned bits;
2996
2997   if (!initialized[mem_mode])
2998     {
2999       HOST_WIDE_INT i;
3000       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3001       int old_cse_not_expected;
3002       unsigned sym_p, var_p, off_p, rat_p, add_c;
3003       rtx seq, addr, base;
3004       rtx reg0, reg1;
3005
3006       initialized[mem_mode] = true;
3007
3008       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3009
3010       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3011       for (i = start; i <= 1 << 20; i <<= 1)
3012         {
3013           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3014           if (!memory_address_p (mem_mode, addr))
3015             break;
3016         }
3017       max_offset[mem_mode] = i == start ? 0 : i >> 1;
3018       off[mem_mode] = max_offset[mem_mode];
3019
3020       for (i = start; i <= 1 << 20; i <<= 1)
3021         {
3022           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3023           if (!memory_address_p (mem_mode, addr))
3024             break;
3025         }
3026       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3027
3028       if (dump_file && (dump_flags & TDF_DETAILS))
3029         {
3030           fprintf (dump_file, "get_address_cost:\n");
3031           fprintf (dump_file, "  min offset %s %d\n",
3032                    GET_MODE_NAME (mem_mode),
3033                    (int) min_offset[mem_mode]);
3034           fprintf (dump_file, "  max offset %s %d\n",
3035                    GET_MODE_NAME (mem_mode),
3036                    (int) max_offset[mem_mode]);
3037         }
3038
3039       rat[mem_mode] = 1;
3040       for (i = 2; i <= MAX_RATIO; i++)
3041         if (multiplier_allowed_in_address_p (i, mem_mode))
3042           {
3043             rat[mem_mode] = i;
3044             break;
3045           }
3046
3047       /* Compute the cost of various addressing modes.  */
3048       acost = 0;
3049       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3050       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3051
3052       for (i = 0; i < 16; i++)
3053         {
3054           sym_p = i & 1;
3055           var_p = (i >> 1) & 1;
3056           off_p = (i >> 2) & 1;
3057           rat_p = (i >> 3) & 1;
3058
3059           addr = reg0;
3060           if (rat_p)
3061             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3062                                    gen_int_mode (rat[mem_mode], Pmode));
3063
3064           if (var_p)
3065             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3066
3067           if (sym_p)
3068             {
3069               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3070               /* ??? We can run into trouble with some backends by presenting
3071                  it with symbols which haven't been properly passed through
3072                  targetm.encode_section_info.  By setting the local bit, we
3073                  enhance the probability of things working.  */
3074               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3075
3076               if (off_p)
3077                 base = gen_rtx_fmt_e (CONST, Pmode,
3078                                       gen_rtx_fmt_ee (PLUS, Pmode,
3079                                                       base,
3080                                                       gen_int_mode (off[mem_mode],
3081                                                                     Pmode)));
3082             }
3083           else if (off_p)
3084             base = gen_int_mode (off[mem_mode], Pmode);
3085           else
3086             base = NULL_RTX;
3087
3088           if (base)
3089             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3090
3091           start_sequence ();
3092           /* To avoid splitting addressing modes, pretend that no cse will
3093              follow.  */
3094           old_cse_not_expected = cse_not_expected;
3095           cse_not_expected = true;
3096           addr = memory_address (mem_mode, addr);
3097           cse_not_expected = old_cse_not_expected;
3098           seq = get_insns ();
3099           end_sequence ();
3100
3101           acost = seq_cost (seq);
3102           acost += address_cost (addr, mem_mode);
3103
3104           if (!acost)
3105             acost = 1;
3106           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3107         }
3108
3109       /* On some targets, it is quite expensive to load symbol to a register,
3110          which makes addresses that contain symbols look much more expensive.
3111          However, the symbol will have to be loaded in any case before the
3112          loop (and quite likely we have it in register already), so it does not
3113          make much sense to penalize them too heavily.  So make some final
3114          tweaks for the SYMBOL_PRESENT modes:
3115
3116          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3117          var is cheaper, use this mode with small penalty.
3118          If VAR_PRESENT is true, try whether the mode with
3119          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3120          if this is the case, use it.  */
3121       add_c = add_cost (Pmode);
3122       for (i = 0; i < 8; i++)
3123         {
3124           var_p = i & 1;
3125           off_p = (i >> 1) & 1;
3126           rat_p = (i >> 2) & 1;
3127
3128           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3129           if (var_p)
3130             acost += add_c;
3131
3132           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3133             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3134         }
3135
3136       if (dump_file && (dump_flags & TDF_DETAILS))
3137         {
3138           fprintf (dump_file, "Address costs:\n");
3139
3140           for (i = 0; i < 16; i++)
3141             {
3142               sym_p = i & 1;
3143               var_p = (i >> 1) & 1;
3144               off_p = (i >> 2) & 1;
3145               rat_p = (i >> 3) & 1;
3146
3147               fprintf (dump_file, "  ");
3148               if (sym_p)
3149                 fprintf (dump_file, "sym + ");
3150               if (var_p)
3151                 fprintf (dump_file, "var + ");
3152               if (off_p)
3153                 fprintf (dump_file, "cst + ");
3154               if (rat_p)
3155                 fprintf (dump_file, "rat * ");
3156
3157               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3158               fprintf (dump_file, "index costs %d\n", acost);
3159             }
3160           fprintf (dump_file, "\n");
3161         }
3162     }
3163
3164   bits = GET_MODE_BITSIZE (Pmode);
3165   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3166   offset &= mask;
3167   if ((offset >> (bits - 1) & 1))
3168     offset |= ~mask;
3169   s_offset = offset;
3170
3171   cost = 0;
3172   offset_p = (s_offset != 0
3173               && min_offset[mem_mode] <= s_offset
3174               && s_offset <= max_offset[mem_mode]);
3175   ratio_p = (ratio != 1
3176              && multiplier_allowed_in_address_p (ratio, mem_mode));
3177
3178   if (ratio != 1 && !ratio_p)
3179     cost += multiply_by_cost (ratio, Pmode);
3180
3181   if (s_offset && !offset_p && !symbol_present)
3182     cost += add_cost (Pmode);
3183
3184   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3185   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3186   return new_cost (cost + acost, complexity);
3187 }
3188
3189 /* Estimates cost of forcing expression EXPR into a variable.  */
3190
3191 static comp_cost
3192 force_expr_to_var_cost (tree expr)
3193 {
3194   static bool costs_initialized = false;
3195   static unsigned integer_cost;
3196   static unsigned symbol_cost;
3197   static unsigned address_cost;
3198   tree op0, op1;
3199   comp_cost cost0, cost1, cost;
3200   enum machine_mode mode;
3201
3202   if (!costs_initialized)
3203     {
3204       tree type = build_pointer_type (integer_type_node);
3205       tree var, addr;
3206       rtx x;
3207
3208       var = create_tmp_var_raw (integer_type_node, "test_var");
3209       TREE_STATIC (var) = 1;
3210       x = produce_memory_decl_rtl (var, NULL);
3211       SET_DECL_RTL (var, x);
3212
3213       integer_cost = computation_cost (build_int_cst (integer_type_node,
3214                                                       2000));
3215
3216       addr = build1 (ADDR_EXPR, type, var);
3217       symbol_cost = computation_cost (addr) + 1;
3218
3219       address_cost
3220         = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3221                                     addr,
3222                                     build_int_cst (sizetype, 2000))) + 1;
3223       if (dump_file && (dump_flags & TDF_DETAILS))
3224         {
3225           fprintf (dump_file, "force_expr_to_var_cost:\n");
3226           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3227           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3228           fprintf (dump_file, "  address %d\n", (int) address_cost);
3229           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3230           fprintf (dump_file, "\n");
3231         }
3232
3233       costs_initialized = true;
3234     }
3235
3236   STRIP_NOPS (expr);
3237
3238   if (SSA_VAR_P (expr))
3239     return zero_cost;
3240
3241   if (is_gimple_min_invariant (expr))
3242     {
3243       if (TREE_CODE (expr) == INTEGER_CST)
3244         return new_cost (integer_cost, 0);
3245
3246       if (TREE_CODE (expr) == ADDR_EXPR)
3247         {
3248           tree obj = TREE_OPERAND (expr, 0);
3249
3250           if (TREE_CODE (obj) == VAR_DECL
3251               || TREE_CODE (obj) == PARM_DECL
3252               || TREE_CODE (obj) == RESULT_DECL)
3253             return new_cost (symbol_cost, 0);
3254         }
3255
3256       return new_cost (address_cost, 0);
3257     }
3258
3259   switch (TREE_CODE (expr))
3260     {
3261     case POINTER_PLUS_EXPR:
3262     case PLUS_EXPR:
3263     case MINUS_EXPR:
3264     case MULT_EXPR:
3265       op0 = TREE_OPERAND (expr, 0);
3266       op1 = TREE_OPERAND (expr, 1);
3267       STRIP_NOPS (op0);
3268       STRIP_NOPS (op1);
3269
3270       if (is_gimple_val (op0))
3271         cost0 = zero_cost;
3272       else
3273         cost0 = force_expr_to_var_cost (op0);
3274
3275       if (is_gimple_val (op1))
3276         cost1 = zero_cost;
3277       else
3278         cost1 = force_expr_to_var_cost (op1);
3279
3280       break;
3281
3282     default:
3283       /* Just an arbitrary value, FIXME.  */
3284       return new_cost (target_spill_cost, 0);
3285     }
3286
3287   mode = TYPE_MODE (TREE_TYPE (expr));
3288   switch (TREE_CODE (expr))
3289     {
3290     case POINTER_PLUS_EXPR:
3291     case PLUS_EXPR:
3292     case MINUS_EXPR:
3293       cost = new_cost (add_cost (mode), 0);
3294       break;
3295
3296     case MULT_EXPR:
3297       if (cst_and_fits_in_hwi (op0))
3298         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3299       else if (cst_and_fits_in_hwi (op1))
3300         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3301       else
3302         return new_cost (target_spill_cost, 0);
3303       break;
3304
3305     default:
3306       gcc_unreachable ();
3307     }
3308
3309   cost = add_costs (cost, cost0);
3310   cost = add_costs (cost, cost1);
3311
3312   /* Bound the cost by target_spill_cost.  The parts of complicated
3313      computations often are either loop invariant or at least can
3314      be shared between several iv uses, so letting this grow without
3315      limits would not give reasonable results.  */
3316   if (cost.cost > target_spill_cost)
3317     cost.cost = target_spill_cost;
3318
3319   return cost;
3320 }
3321
3322 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3323    invariants the computation depends on.  */
3324
3325 static comp_cost
3326 force_var_cost (struct ivopts_data *data,
3327                 tree expr, bitmap *depends_on)
3328 {
3329   if (depends_on)
3330     {
3331       fd_ivopts_data = data;
3332       walk_tree (&expr, find_depends, depends_on, NULL);
3333     }
3334
3335   return force_expr_to_var_cost (expr);
3336 }
3337
3338 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3339    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3340    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3341    invariants the computation depends on.  */
3342
3343 static comp_cost
3344 split_address_cost (struct ivopts_data *data,
3345                     tree addr, bool *symbol_present, bool *var_present,
3346                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3347 {
3348   tree core;
3349   HOST_WIDE_INT bitsize;
3350   HOST_WIDE_INT bitpos;
3351   tree toffset;
3352   enum machine_mode mode;
3353   int unsignedp, volatilep;
3354
3355   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3356                               &unsignedp, &volatilep, false);
3357
3358   if (toffset != 0
3359       || bitpos % BITS_PER_UNIT != 0
3360       || TREE_CODE (core) != VAR_DECL)
3361     {
3362       *symbol_present = false;
3363       *var_present = true;
3364       fd_ivopts_data = data;
3365       walk_tree (&addr, find_depends, depends_on, NULL);
3366       return new_cost (target_spill_cost, 0);
3367     }
3368
3369   *offset += bitpos / BITS_PER_UNIT;
3370   if (TREE_STATIC (core)
3371       || DECL_EXTERNAL (core))
3372     {
3373       *symbol_present = true;
3374       *var_present = false;
3375       return zero_cost;
3376     }
3377
3378   *symbol_present = false;
3379   *var_present = true;
3380   return zero_cost;
3381 }
3382
3383 /* Estimates cost of expressing difference of addresses E1 - E2 as
3384    var + symbol + offset.  The value of offset is added to OFFSET,
3385    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3386    part is missing.  DEPENDS_ON is a set of the invariants the computation
3387    depends on.  */
3388
3389 static comp_cost
3390 ptr_difference_cost (struct ivopts_data *data,
3391                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3392                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3393 {
3394   HOST_WIDE_INT diff = 0;
3395   comp_cost cost;
3396
3397   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3398
3399   if (ptr_difference_const (e1, e2, &diff))
3400     {
3401       *offset += diff;
3402       *symbol_present = false;
3403       *var_present = false;
3404       return zero_cost;
3405     }
3406
3407   if (integer_zerop (e2))
3408     return split_address_cost (data, TREE_OPERAND (e1, 0),
3409                                symbol_present, var_present, offset, depends_on);
3410
3411   *symbol_present = false;
3412   *var_present = true;
3413
3414   cost = force_var_cost (data, e1, depends_on);
3415   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3416   cost.cost += add_cost (Pmode);
3417
3418   return cost;
3419 }
3420
3421 /* Estimates cost of expressing difference E1 - E2 as
3422    var + symbol + offset.  The value of offset is added to OFFSET,
3423    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3424    part is missing.  DEPENDS_ON is a set of the invariants the computation
3425    depends on.  */
3426
3427 static comp_cost
3428 difference_cost (struct ivopts_data *data,
3429                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3430                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3431 {
3432   comp_cost cost;
3433   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3434   unsigned HOST_WIDE_INT off1, off2;
3435
3436   e1 = strip_offset (e1, &off1);
3437   e2 = strip_offset (e2, &off2);
3438   *offset += off1 - off2;
3439
3440   STRIP_NOPS (e1);
3441   STRIP_NOPS (e2);
3442
3443   if (TREE_CODE (e1) == ADDR_EXPR)
3444     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3445                                 depends_on);
3446   *symbol_present = false;
3447
3448   if (operand_equal_p (e1, e2, 0))
3449     {
3450       *var_present = false;
3451       return zero_cost;
3452     }
3453   *var_present = true;
3454   if (integer_zerop (e2))
3455     return force_var_cost (data, e1, depends_on);
3456
3457   if (integer_zerop (e1))
3458     {
3459       cost = force_var_cost (data, e2, depends_on);
3460       cost.cost += multiply_by_cost (-1, mode);
3461
3462       return cost;
3463     }
3464
3465   cost = force_var_cost (data, e1, depends_on);
3466   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3467   cost.cost += add_cost (mode);
3468
3469   return cost;
3470 }
3471
3472 /* Determines the cost of the computation by that USE is expressed
3473    from induction variable CAND.  If ADDRESS_P is true, we just need
3474    to create an address from it, otherwise we want to get it into
3475    register.  A set of invariants we depend on is stored in
3476    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3477
3478 static comp_cost
3479 get_computation_cost_at (struct ivopts_data *data,
3480                          struct iv_use *use, struct iv_cand *cand,
3481                          bool address_p, bitmap *depends_on, gimple at)
3482 {
3483   tree ubase = use->iv->base, ustep = use->iv->step;
3484   tree cbase, cstep;
3485   tree utype = TREE_TYPE (ubase), ctype;
3486   unsigned HOST_WIDE_INT cstepi, offset = 0;
3487   HOST_WIDE_INT ratio, aratio;
3488   bool var_present, symbol_present;
3489   comp_cost cost;
3490   unsigned n_sums;
3491   double_int rat;
3492
3493   *depends_on = NULL;
3494
3495   /* Only consider real candidates.  */
3496   if (!cand->iv)
3497     return infinite_cost;
3498
3499   cbase = cand->iv->base;
3500   cstep = cand->iv->step;
3501   ctype = TREE_TYPE (cbase);
3502
3503   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3504     {
3505       /* We do not have a precision to express the values of use.  */
3506       return infinite_cost;
3507     }
3508
3509   if (address_p)
3510     {
3511       /* Do not try to express address of an object with computation based
3512          on address of a different object.  This may cause problems in rtl
3513          level alias analysis (that does not expect this to be happening,
3514          as this is illegal in C), and would be unlikely to be useful
3515          anyway.  */
3516       if (use->iv->base_object
3517           && cand->iv->base_object
3518           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3519         return infinite_cost;
3520     }
3521
3522   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3523     {
3524       /* TODO -- add direct handling of this case.  */
3525       goto fallback;
3526     }
3527
3528   /* CSTEPI is removed from the offset in case statement is after the
3529      increment.  If the step is not constant, we use zero instead.
3530      This is a bit imprecise (there is the extra addition), but
3531      redundancy elimination is likely to transform the code so that
3532      it uses value of the variable before increment anyway,
3533      so it is not that much unrealistic.  */
3534   if (cst_and_fits_in_hwi (cstep))
3535     cstepi = int_cst_value (cstep);
3536   else
3537     cstepi = 0;
3538
3539   if (!constant_multiple_of (ustep, cstep, &rat))
3540     return infinite_cost;
3541
3542   if (double_int_fits_in_shwi_p (rat))
3543     ratio = double_int_to_shwi (rat);
3544   else
3545     return infinite_cost;
3546
3547   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3548      or ratio == 1, it is better to handle this like
3549
3550      ubase - ratio * cbase + ratio * var
3551
3552      (also holds in the case ratio == -1, TODO.  */
3553
3554   if (cst_and_fits_in_hwi (cbase))
3555     {
3556       offset = - ratio * int_cst_value (cbase);
3557       cost = difference_cost (data,
3558                               ubase, build_int_cst (utype, 0),
3559                               &symbol_present, &var_present, &offset,
3560                               depends_on);
3561     }
3562   else if (ratio == 1)
3563     {
3564       cost = difference_cost (data,
3565                               ubase, cbase,
3566                               &symbol_present, &var_present, &offset,
3567                               depends_on);
3568     }
3569   else
3570     {
3571       cost = force_var_cost (data, cbase, depends_on);
3572       cost.cost += add_cost (TYPE_MODE (ctype));
3573       cost = add_costs (cost,
3574                         difference_cost (data,
3575                                          ubase, build_int_cst (utype, 0),
3576                                          &symbol_present, &var_present,
3577                                          &offset, depends_on));
3578     }
3579
3580   /* If we are after the increment, the value of the candidate is higher by
3581      one iteration.  */
3582   if (stmt_after_increment (data->current_loop, cand, at))
3583     offset -= ratio * cstepi;
3584
3585   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3586      (symbol/var/const parts may be omitted).  If we are looking for an address,
3587      find the cost of addressing this.  */
3588   if (address_p)
3589     return add_costs (cost, get_address_cost (symbol_present, var_present,
3590                                 offset, ratio,
3591                                 TYPE_MODE (TREE_TYPE (*use->op_p))));
3592
3593   /* Otherwise estimate the costs for computing the expression.  */
3594   aratio = ratio > 0 ? ratio : -ratio;
3595   if (!symbol_present && !var_present && !offset)
3596     {
3597       if (ratio != 1)
3598         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3599
3600       return cost;
3601     }
3602
3603   if (aratio != 1)
3604     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3605
3606   n_sums = 1;
3607   if (var_present
3608       /* Symbol + offset should be compile-time computable.  */
3609       && (symbol_present || offset))
3610     n_sums++;
3611
3612   /* Having offset does not affect runtime cost in case it is added to
3613      symbol, but it increases complexity.  */
3614   if (offset)
3615     cost.complexity++;
3616
3617   cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3618   return cost;
3619
3620 fallback:
3621   {
3622     /* Just get the expression, expand it and measure the cost.  */
3623     tree comp = get_computation_at (data->current_loop, use, cand, at);
3624
3625     if (!comp)
3626       return infinite_cost;
3627
3628     if (address_p)
3629       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3630
3631     return new_cost (computation_cost (comp), 0);
3632   }
3633 }
3634
3635 /* Determines the cost of the computation by that USE is expressed
3636    from induction variable CAND.  If ADDRESS_P is true, we just need
3637    to create an address from it, otherwise we want to get it into
3638    register.  A set of invariants we depend on is stored in
3639    DEPENDS_ON.  */
3640
3641 static comp_cost
3642 get_computation_cost (struct ivopts_data *data,
3643                       struct iv_use *use, struct iv_cand *cand,
3644                       bool address_p, bitmap *depends_on)
3645 {
3646   return get_computation_cost_at (data,
3647                                   use, cand, address_p, depends_on, use->stmt);
3648 }
3649
3650 /* Determines cost of basing replacement of USE on CAND in a generic
3651    expression.  */
3652
3653 static bool
3654 determine_use_iv_cost_generic (struct ivopts_data *data,
3655                                struct iv_use *use, struct iv_cand *cand)
3656 {
3657   bitmap depends_on;
3658   comp_cost cost;
3659
3660   /* The simple case first -- if we need to express value of the preserved
3661      original biv, the cost is 0.  This also prevents us from counting the
3662      cost of increment twice -- once at this use and once in the cost of
3663      the candidate.  */
3664   if (cand->pos == IP_ORIGINAL
3665       && cand->incremented_at == use->stmt)
3666     {
3667       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3668       return true;
3669     }
3670
3671   cost = get_computation_cost (data, use, cand, false, &depends_on);
3672   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3673
3674   return !infinite_cost_p (cost);
3675 }
3676
3677 /* Determines cost of basing replacement of USE on CAND in an address.  */
3678
3679 static bool
3680 determine_use_iv_cost_address (struct ivopts_data *data,
3681                                struct iv_use *use, struct iv_cand *cand)
3682 {
3683   bitmap depends_on;
3684   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3685
3686   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3687
3688   return !infinite_cost_p (cost);
3689 }
3690
3691 /* Computes value of candidate CAND at position AT in iteration NITER, and
3692    stores it to VAL.  */
3693
3694 static void
3695 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3696                aff_tree *val)
3697 {
3698   aff_tree step, delta, nit;
3699   struct iv *iv = cand->iv;
3700   tree type = TREE_TYPE (iv->base);
3701   tree steptype = type;
3702   if (POINTER_TYPE_P (type))
3703     steptype = sizetype;
3704
3705   tree_to_aff_combination (iv->step, steptype, &step);
3706   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3707   aff_combination_convert (&nit, steptype);
3708   aff_combination_mult (&nit, &step, &delta);
3709   if (stmt_after_increment (loop, cand, at))
3710     aff_combination_add (&delta, &step);
3711
3712   tree_to_aff_combination (iv->base, type, val);
3713   aff_combination_add (val, &delta);
3714 }
3715
3716 /* Returns period of induction variable iv.  */
3717
3718 static tree
3719 iv_period (struct iv *iv)
3720 {
3721   tree step = iv->step, period, type;
3722   tree pow2div;
3723
3724   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3725
3726   /* Period of the iv is gcd (step, type range).  Since type range is power
3727      of two, it suffices to determine the maximum power of two that divides
3728      step.  */
3729   pow2div = num_ending_zeros (step);
3730   type = unsigned_type_for (TREE_TYPE (step));
3731
3732   period = build_low_bits_mask (type,
3733                                 (TYPE_PRECISION (type)
3734                                  - tree_low_cst (pow2div, 1)));
3735
3736   return period;
3737 }
3738
3739 /* Returns the comparison operator used when eliminating the iv USE.  */
3740
3741 static enum tree_code
3742 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3743 {
3744   struct loop *loop = data->current_loop;
3745   basic_block ex_bb;
3746   edge exit;
3747
3748   ex_bb = gimple_bb (use->stmt);
3749   exit = EDGE_SUCC (ex_bb, 0);
3750   if (flow_bb_inside_loop_p (loop, exit->dest))
3751     exit = EDGE_SUCC (ex_bb, 1);
3752
3753   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3754 }
3755
3756 /* Check whether it is possible to express the condition in USE by comparison
3757    of candidate CAND.  If so, store the value compared with to BOUND.  */
3758
3759 static bool
3760 may_eliminate_iv (struct ivopts_data *data,
3761                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3762 {
3763   basic_block ex_bb;
3764   edge exit;
3765   tree nit, period;
3766   struct loop *loop = data->current_loop;
3767   aff_tree bnd;
3768
3769   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3770     return false;
3771
3772   /* For now works only for exits that dominate the loop latch.
3773      TODO: extend to other conditions inside loop body.  */
3774   ex_bb = gimple_bb (use->stmt);
3775   if (use->stmt != last_stmt (ex_bb)
3776       || gimple_code (use->stmt) != GIMPLE_COND
3777       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3778     return false;
3779
3780   exit = EDGE_SUCC (ex_bb, 0);
3781   if (flow_bb_inside_loop_p (loop, exit->dest))
3782     exit = EDGE_SUCC (ex_bb, 1);
3783   if (flow_bb_inside_loop_p (loop, exit->dest))
3784     return false;
3785
3786   nit = niter_for_exit (data, exit);
3787   if (!nit)
3788     return false;
3789
3790   /* Determine whether we can use the variable to test the exit condition.
3791      This is the case iff the period of the induction variable is greater
3792      than the number of iterations for which the exit condition is true.  */
3793   period = iv_period (cand->iv);
3794
3795   /* If the number of iterations is constant, compare against it directly.  */
3796   if (TREE_CODE (nit) == INTEGER_CST)
3797     {
3798       if (!tree_int_cst_lt (nit, period))
3799         return false;
3800     }
3801
3802   /* If not, and if this is the only possible exit of the loop, see whether
3803      we can get a conservative estimate on the number of iterations of the
3804      entire loop and compare against that instead.  */
3805   else if (loop_only_exit_p (loop, exit))
3806     {
3807       double_int period_value, max_niter;
3808       if (!estimated_loop_iterations (loop, true, &max_niter))
3809         return false;
3810       period_value = tree_to_double_int (period);
3811       if (double_int_ucmp (max_niter, period_value) >= 0)
3812         return false;
3813     }
3814
3815   /* Otherwise, punt.  */
3816   else
3817     return false;
3818
3819   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3820   *bound = aff_combination_to_tree (&bnd);
3821   return true;
3822 }
3823
3824 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3825
3826 static bool
3827 determine_use_iv_cost_condition (struct ivopts_data *data,
3828                                  struct iv_use *use, struct iv_cand *cand)
3829 {
3830   tree bound = NULL_TREE;
3831   struct iv *cmp_iv;
3832   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3833   comp_cost elim_cost, express_cost, cost;
3834   bool ok;
3835
3836   /* Only consider real candidates.  */
3837   if (!cand->iv)
3838     {
3839       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3840       return false;
3841     }
3842
3843   /* Try iv elimination.  */
3844   if (may_eliminate_iv (data, use, cand, &bound))
3845     {
3846       elim_cost = force_var_cost (data, bound, &depends_on_elim);
3847       /* The bound is a loop invariant, so it will be only computed
3848          once.  */
3849       elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3850     }
3851   else
3852     elim_cost = infinite_cost;
3853
3854   /* Try expressing the original giv.  If it is compared with an invariant,
3855      note that we cannot get rid of it.  */
3856   ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
3857   gcc_assert (ok);
3858
3859   express_cost = get_computation_cost (data, use, cand, false,
3860                                        &depends_on_express);
3861   fd_ivopts_data = data;
3862   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3863
3864   /* Choose the better approach.  */
3865   if (compare_costs (elim_cost, express_cost) < 0)
3866     {
3867       cost = elim_cost;
3868       depends_on = depends_on_elim;
3869       depends_on_elim = NULL;
3870     }
3871   else
3872     {
3873       cost = express_cost;
3874       depends_on = depends_on_express;
3875       depends_on_express = NULL;
3876       bound = NULL_TREE;
3877     }
3878
3879   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3880
3881   if (depends_on_elim)
3882     BITMAP_FREE (depends_on_elim);
3883   if (depends_on_express)
3884     BITMAP_FREE (depends_on_express);
3885
3886   return !infinite_cost_p (cost);
3887 }
3888
3889 /* Determines cost of basing replacement of USE on CAND.  Returns false
3890    if USE cannot be based on CAND.  */
3891
3892 static bool
3893 determine_use_iv_cost (struct ivopts_data *data,
3894                        struct iv_use *use, struct iv_cand *cand)
3895 {
3896   switch (use->type)
3897     {
3898     case USE_NONLINEAR_EXPR:
3899       return determine_use_iv_cost_generic (data, use, cand);
3900
3901     case USE_ADDRESS:
3902       return determine_use_iv_cost_address (data, use, cand);
3903
3904     case USE_COMPARE:
3905       return determine_use_iv_cost_condition (data, use, cand);
3906
3907     default:
3908       gcc_unreachable ();
3909     }
3910 }
3911
3912 /* Determines costs of basing the use of the iv on an iv candidate.  */
3913
3914 static void
3915 determine_use_iv_costs (struct ivopts_data *data)
3916 {
3917   unsigned i, j;
3918   struct iv_use *use;
3919   struct iv_cand *cand;
3920   bitmap to_clear = BITMAP_ALLOC (NULL);
3921
3922   alloc_use_cost_map (data);
3923
3924   for (i = 0; i < n_iv_uses (data); i++)
3925     {
3926       use = iv_use (data, i);
3927
3928       if (data->consider_all_candidates)
3929         {
3930           for (j = 0; j < n_iv_cands (data); j++)
3931             {
3932               cand = iv_cand (data, j);
3933               determine_use_iv_cost (data, use, cand);
3934             }
3935         }
3936       else
3937         {
3938           bitmap_iterator bi;
3939
3940           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3941             {
3942               cand = iv_cand (data, j);
3943               if (!determine_use_iv_cost (data, use, cand))
3944                 bitmap_set_bit (to_clear, j);
3945             }
3946
3947           /* Remove the candidates for that the cost is infinite from
3948              the list of related candidates.  */
3949           bitmap_and_compl_into (use->related_cands, to_clear);
3950           bitmap_clear (to_clear);
3951         }
3952     }
3953
3954   BITMAP_FREE (to_clear);
3955
3956   if (dump_file && (dump_flags & TDF_DETAILS))
3957     {
3958       fprintf (dump_file, "Use-candidate costs:\n");
3959
3960       for (i = 0; i < n_iv_uses (data); i++)
3961         {
3962           use = iv_use (data, i);
3963
3964           fprintf (dump_file, "Use %d:\n", i);
3965           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
3966           for (j = 0; j < use->n_map_members; j++)
3967             {
3968               if (!use->cost_map[j].cand
3969                   || infinite_cost_p (use->cost_map[j].cost))
3970                 continue;
3971
3972               fprintf (dump_file, "  %d\t%d\t%d\t",
3973                        use->cost_map[j].cand->id,
3974                        use->cost_map[j].cost.cost,
3975                        use->cost_map[j].cost.complexity);
3976               if (use->cost_map[j].depends_on)
3977                 bitmap_print (dump_file,
3978                               use->cost_map[j].depends_on, "","");
3979               fprintf (dump_file, "\n");
3980             }
3981
3982           fprintf (dump_file, "\n");
3983         }
3984       fprintf (dump_file, "\n");
3985     }
3986 }
3987
3988 /* Determines cost of the candidate CAND.  */
3989
3990 static void
3991 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3992 {
3993   comp_cost cost_base;
3994   unsigned cost, cost_step;
3995   tree base;
3996
3997   if (!cand->iv)
3998     {
3999       cand->cost = 0;
4000       return;
4001     }
4002
4003   /* There are two costs associated with the candidate -- its increment
4004      and its initialization.  The second is almost negligible for any loop
4005      that rolls enough, so we take it just very little into account.  */
4006
4007   base = cand->iv->base;
4008   cost_base = force_var_cost (data, base, NULL);
4009   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4010
4011   cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4012
4013   /* Prefer the original ivs unless we may gain something by replacing it.
4014      The reason is to make debugging simpler; so this is not relevant for
4015      artificial ivs created by other optimization passes.  */
4016   if (cand->pos != IP_ORIGINAL
4017       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4018     cost++;
4019
4020   /* Prefer not to insert statements into latch unless there are some
4021      already (so that we do not create unnecessary jumps).  */
4022   if (cand->pos == IP_END
4023       && empty_block_p (ip_end_pos (data->current_loop)))
4024     cost++;
4025
4026   cand->cost = cost;
4027 }
4028
4029 /* Determines costs of computation of the candidates.  */
4030
4031 static void
4032 determine_iv_costs (struct ivopts_data *data)
4033 {
4034   unsigned i;
4035
4036   if (dump_file && (dump_flags & TDF_DETAILS))
4037     {
4038       fprintf (dump_file, "Candidate costs:\n");
4039       fprintf (dump_file, "  cand\tcost\n");
4040     }
4041
4042   for (i = 0; i < n_iv_cands (data); i++)
4043     {
4044       struct iv_cand *cand = iv_cand (data, i);
4045
4046       determine_iv_cost (data, cand);
4047
4048       if (dump_file && (dump_flags & TDF_DETAILS))
4049         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4050     }
4051
4052   if (dump_file && (dump_flags & TDF_DETAILS))
4053     fprintf (dump_file, "\n");
4054 }
4055
4056 /* Calculates cost for having SIZE induction variables.  */
4057
4058 static unsigned
4059 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4060 {
4061   /* We add size to the cost, so that we prefer eliminating ivs
4062      if possible.  */
4063   return size + estimate_reg_pressure_cost (size, data->regs_used);
4064 }
4065
4066 /* For each size of the induction variable set determine the penalty.  */
4067
4068 static void
4069 determine_set_costs (struct ivopts_data *data)
4070 {
4071   unsigned j, n;
4072   gimple phi;
4073   gimple_stmt_iterator psi;
4074   tree op;
4075   struct loop *loop = data->current_loop;
4076   bitmap_iterator bi;
4077
4078   /* We use the following model (definitely improvable, especially the
4079      cost function -- TODO):
4080
4081      We estimate the number of registers available (using MD data), name it A.
4082
4083      We estimate the number of registers used by the loop, name it U.  This
4084      number is obtained as the number of loop phi nodes (not counting virtual
4085      registers and bivs) + the number of variables from outside of the loop.
4086
4087      We set a reserve R (free regs that are used for temporary computations,
4088      etc.).  For now the reserve is a constant 3.
4089
4090      Let I be the number of induction variables.
4091
4092      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4093         make a lot of ivs without a reason).
4094      -- if A - R < U + I <= A, the cost is I * PRES_COST
4095      -- if U + I > A, the cost is I * PRES_COST and
4096         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4097
4098   if (dump_file && (dump_flags & TDF_DETAILS))
4099     {
4100       fprintf (dump_file, "Global costs:\n");
4101       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4102       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost);
4103       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4104     }
4105
4106   n = 0;
4107   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4108     {
4109       phi = gsi_stmt (psi);
4110       op = PHI_RESULT (phi);
4111
4112       if (!is_gimple_reg (op))
4113         continue;
4114
4115       if (get_iv (data, op))
4116         continue;
4117
4118       n++;
4119     }
4120
4121   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4122     {
4123       struct version_info *info = ver_info (data, j);
4124
4125       if (info->inv_id && info->has_nonlin_use)
4126         n++;
4127     }
4128
4129   data->regs_used = n;
4130   if (dump_file && (dump_flags & TDF_DETAILS))
4131     fprintf (dump_file, "  regs_used %d\n", n);
4132
4133   if (dump_file && (dump_flags & TDF_DETAILS))
4134     {
4135       fprintf (dump_file, "  cost for size:\n");
4136       fprintf (dump_file, "  ivs\tcost\n");
4137       for (j = 0; j <= 2 * target_avail_regs; j++)
4138         fprintf (dump_file, "  %d\t%d\n", j,
4139                  ivopts_global_cost_for_size (data, j));
4140       fprintf (dump_file, "\n");
4141     }
4142 }
4143
4144 /* Returns true if A is a cheaper cost pair than B.  */
4145
4146 static bool
4147 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4148 {
4149   int cmp;
4150
4151   if (!a)
4152     return false;
4153
4154   if (!b)
4155     return true;
4156
4157   cmp = compare_costs (a->cost, b->cost);
4158   if (cmp < 0)
4159     return true;
4160
4161   if (cmp > 0)
4162     return false;
4163
4164   /* In case the costs are the same, prefer the cheaper candidate.  */
4165   if (a->cand->cost < b->cand->cost)
4166     return true;
4167
4168   return false;
4169 }
4170
4171 /* Computes the cost field of IVS structure.  */
4172
4173 static void
4174 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4175 {
4176   comp_cost cost = ivs->cand_use_cost;
4177   cost.cost += ivs->cand_cost;
4178   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4179
4180   ivs->cost = cost;
4181 }
4182
4183 /* Remove invariants in set INVS to set IVS.  */
4184
4185 static void
4186 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4187 {
4188   bitmap_iterator bi;
4189   unsigned iid;
4190
4191   if (!invs)
4192     return;
4193
4194   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4195     {
4196       ivs->n_invariant_uses[iid]--;
4197       if (ivs->n_invariant_uses[iid] == 0)
4198         ivs->n_regs--;
4199     }
4200 }
4201
4202 /* Set USE not to be expressed by any candidate in IVS.  */
4203
4204 static void
4205 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4206                  struct iv_use *use)
4207 {
4208   unsigned uid = use->id, cid;
4209   struct cost_pair *cp;
4210
4211   cp = ivs->cand_for_use[uid];
4212   if (!cp)
4213     return;
4214   cid = cp->cand->id;
4215
4216   ivs->bad_uses++;
4217   ivs->cand_for_use[uid] = NULL;
4218   ivs->n_cand_uses[cid]--;
4219
4220   if (ivs->n_cand_uses[cid] == 0)
4221     {
4222       bitmap_clear_bit (ivs->cands, cid);
4223       /* Do not count the pseudocandidates.  */
4224       if (cp->cand->iv)
4225         ivs->n_regs--;
4226       ivs->n_cands--;
4227       ivs->cand_cost -= cp->cand->cost;
4228
4229       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4230     }
4231
4232   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4233
4234   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4235   iv_ca_recount_cost (data, ivs);
4236 }
4237
4238 /* Add invariants in set INVS to set IVS.  */
4239
4240 static void
4241 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4242 {
4243   bitmap_iterator bi;
4244   unsigned iid;
4245
4246   if (!invs)
4247     return;
4248
4249   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4250     {
4251       ivs->n_invariant_uses[iid]++;
4252       if (ivs->n_invariant_uses[iid] == 1)
4253         ivs->n_regs++;
4254     }
4255 }
4256
4257 /* Set cost pair for USE in set IVS to CP.  */
4258
4259 static void
4260 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4261               struct iv_use *use, struct cost_pair *cp)
4262 {
4263   unsigned uid = use->id, cid;
4264
4265   if (ivs->cand_for_use[uid] == cp)
4266     return;
4267
4268   if (ivs->cand_for_use[uid])
4269     iv_ca_set_no_cp (data, ivs, use);
4270
4271   if (cp)
4272     {
4273       cid = cp->cand->id;
4274
4275       ivs->bad_uses--;
4276       ivs->cand_for_use[uid] = cp;
4277       ivs->n_cand_uses[cid]++;
4278       if (ivs->n_cand_uses[cid] == 1)
4279         {
4280           bitmap_set_bit (ivs->cands, cid);
4281           /* Do not count the pseudocandidates.  */
4282           if (cp->cand->iv)
4283             ivs->n_regs++;
4284           ivs->n_cands++;
4285           ivs->cand_cost += cp->cand->cost;
4286
4287           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4288         }
4289
4290       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4291       iv_ca_set_add_invariants (ivs, cp->depends_on);
4292       iv_ca_recount_cost (data, ivs);
4293     }
4294 }
4295
4296 /* Extend set IVS by expressing USE by some of the candidates in it
4297    if possible.  */
4298
4299 static void
4300 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4301                struct iv_use *use)
4302 {
4303   struct cost_pair *best_cp = NULL, *cp;
4304   bitmap_iterator bi;
4305   unsigned i;
4306
4307   gcc_assert (ivs->upto >= use->id);
4308
4309   if (ivs->upto == use->id)
4310     {
4311       ivs->upto++;
4312       ivs->bad_uses++;
4313     }
4314
4315   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4316     {
4317       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4318
4319       if (cheaper_cost_pair (cp, best_cp))
4320         best_cp = cp;
4321     }
4322
4323   iv_ca_set_cp (data, ivs, use, best_cp);
4324 }
4325
4326 /* Get cost for assignment IVS.  */
4327
4328 static comp_cost
4329 iv_ca_cost (struct iv_ca *ivs)
4330 {
4331   return (ivs->bad_uses ? infinite_cost : ivs->cost);
4332 }
4333
4334 /* Returns true if all dependences of CP are among invariants in IVS.  */
4335
4336 static bool
4337 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4338 {
4339   unsigned i;
4340   bitmap_iterator bi;
4341
4342   if (!cp->depends_on)
4343     return true;
4344
4345   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4346     {
4347       if (ivs->n_invariant_uses[i] == 0)
4348         return false;
4349     }
4350
4351   return true;
4352 }
4353
4354 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4355    it before NEXT_CHANGE.  */
4356
4357 static struct iv_ca_delta *
4358 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4359                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4360 {
4361   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4362
4363   change->use = use;
4364   change->old_cp = old_cp;
4365   change->new_cp = new_cp;
4366   change->next_change = next_change;
4367
4368   return change;
4369 }
4370
4371 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4372    are rewritten.  */
4373
4374 static struct iv_ca_delta *
4375 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4376 {
4377   struct iv_ca_delta *last;
4378
4379   if (!l2)
4380     return l1;
4381
4382   if (!l1)
4383     return l2;
4384
4385   for (last = l1; last->next_change; last = last->next_change)
4386     continue;
4387   last->next_change = l2;
4388
4389   return l1;
4390 }
4391
4392 /* Returns candidate by that USE is expressed in IVS.  */
4393
4394 static struct cost_pair *
4395 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4396 {
4397   return ivs->cand_for_use[use->id];
4398 }
4399
4400 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4401
4402 static struct iv_ca_delta *
4403 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4404 {
4405   struct iv_ca_delta *act, *next, *prev = NULL;
4406   struct cost_pair *tmp;
4407
4408   for (act = delta; act; act = next)
4409     {
4410       next = act->next_change;
4411       act->next_change = prev;
4412       prev = act;
4413
4414       tmp = act->old_cp;
4415       act->old_cp = act->new_cp;
4416       act->new_cp = tmp;
4417     }
4418
4419   return prev;
4420 }
4421
4422 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4423    reverted instead.  */
4424
4425 static void
4426 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4427                     struct iv_ca_delta *delta, bool forward)
4428 {
4429   struct cost_pair *from, *to;
4430   struct iv_ca_delta *act;
4431
4432   if (!forward)
4433     delta = iv_ca_delta_reverse (delta);
4434
4435   for (act = delta; act; act = act->next_change)
4436     {
4437       from = act->old_cp;
4438       to = act->new_cp;
4439       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4440       iv_ca_set_cp (data, ivs, act->use, to);
4441     }
4442
4443   if (!forward)
4444     iv_ca_delta_reverse (delta);
4445 }
4446
4447 /* Returns true if CAND is used in IVS.  */
4448
4449 static bool
4450 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4451 {
4452   return ivs->n_cand_uses[cand->id] > 0;
4453 }
4454
4455 /* Returns number of induction variable candidates in the set IVS.  */
4456
4457 static unsigned
4458 iv_ca_n_cands (struct iv_ca *ivs)
4459 {
4460   return ivs->n_cands;
4461 }
4462
4463 /* Free the list of changes DELTA.  */
4464
4465 static void
4466 iv_ca_delta_free (struct iv_ca_delta **delta)
4467 {
4468   struct iv_ca_delta *act, *next;
4469
4470   for (act = *delta; act; act = next)
4471     {
4472       next = act->next_change;
4473       free (act);
4474     }
4475
4476   *delta = NULL;
4477 }
4478
4479 /* Allocates new iv candidates assignment.  */
4480
4481 static struct iv_ca *
4482 iv_ca_new (struct ivopts_data *data)
4483 {
4484   struct iv_ca *nw = XNEW (struct iv_ca);
4485
4486   nw->upto = 0;
4487   nw->bad_uses = 0;
4488   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4489   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4490   nw->cands = BITMAP_ALLOC (NULL);
4491   nw->n_cands = 0;
4492   nw->n_regs = 0;
4493   nw->cand_use_cost = zero_cost;
4494   nw->cand_cost = 0;
4495   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4496   nw->cost = zero_cost;
4497
4498   return nw;
4499 }
4500
4501 /* Free memory occupied by the set IVS.  */
4502
4503 static void
4504 iv_ca_free (struct iv_ca **ivs)
4505 {
4506   free ((*ivs)->cand_for_use);
4507   free ((*ivs)->n_cand_uses);
4508   BITMAP_FREE ((*ivs)->cands);
4509   free ((*ivs)->n_invariant_uses);
4510   free (*ivs);
4511   *ivs = NULL;
4512 }
4513
4514 /* Dumps IVS to FILE.  */
4515
4516 static void
4517 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4518 {
4519   const char *pref = "  invariants ";
4520   unsigned i;
4521   comp_cost cost = iv_ca_cost (ivs);
4522
4523   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4524   bitmap_print (file, ivs->cands, "  candidates ","\n");
4525
4526   for (i = 1; i <= data->max_inv_id; i++)
4527     if (ivs->n_invariant_uses[i])
4528       {
4529         fprintf (file, "%s%d", pref, i);
4530         pref = ", ";
4531       }
4532   fprintf (file, "\n");
4533 }
4534
4535 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4536    new set, and store differences in DELTA.  Number of induction variables
4537    in the new set is stored to N_IVS.  */
4538
4539 static comp_cost
4540 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4541               struct iv_cand *cand, struct iv_ca_delta **delta,
4542               unsigned *n_ivs)
4543 {
4544   unsigned i;
4545   comp_cost cost;
4546   struct iv_use *use;
4547   struct cost_pair *old_cp, *new_cp;
4548
4549   *delta = NULL;
4550   for (i = 0; i < ivs->upto; i++)
4551     {
4552       use = iv_use (data, i);
4553       old_cp = iv_ca_cand_for_use (ivs, use);
4554
4555       if (old_cp
4556           && old_cp->cand == cand)
4557         continue;
4558
4559       new_cp = get_use_iv_cost (data, use, cand);
4560       if (!new_cp)
4561         continue;
4562
4563       if (!iv_ca_has_deps (ivs, new_cp))
4564         continue;
4565
4566       if (!cheaper_cost_pair (new_cp, old_cp))
4567         continue;
4568
4569       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4570     }
4571
4572   iv_ca_delta_commit (data, ivs, *delta, true);
4573   cost = iv_ca_cost (ivs);
4574   if (n_ivs)
4575     *n_ivs = iv_ca_n_cands (ivs);
4576   iv_ca_delta_commit (data, ivs, *delta, false);
4577
4578   return cost;
4579 }
4580
4581 /* Try narrowing set IVS by removing CAND.  Return the cost of
4582    the new set and store the differences in DELTA.  */
4583
4584 static comp_cost
4585 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4586               struct iv_cand *cand, struct iv_ca_delta **delta)
4587 {
4588   unsigned i, ci;
4589   struct iv_use *use;
4590   struct cost_pair *old_cp, *new_cp, *cp;
4591   bitmap_iterator bi;
4592   struct iv_cand *cnd;
4593   comp_cost cost;
4594
4595   *delta = NULL;
4596   for (i = 0; i < n_iv_uses (data); i++)
4597     {
4598       use = iv_use (data, i);
4599
4600       old_cp = iv_ca_cand_for_use (ivs, use);
4601       if (old_cp->cand != cand)
4602         continue;
4603
4604       new_cp = NULL;
4605
4606       if (data->consider_all_candidates)
4607         {
4608           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4609             {
4610               if (ci == cand->id)
4611                 continue;
4612
4613               cnd = iv_cand (data, ci);
4614
4615               cp = get_use_iv_cost (data, use, cnd);
4616               if (!cp)
4617                 continue;
4618               if (!iv_ca_has_deps (ivs, cp))
4619                 continue;
4620
4621               if (!cheaper_cost_pair (cp, new_cp))
4622                 continue;
4623
4624               new_cp = cp;
4625             }
4626         }
4627       else
4628         {
4629           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4630             {
4631               if (ci == cand->id)
4632                 continue;
4633
4634               cnd = iv_cand (data, ci);
4635
4636               cp = get_use_iv_cost (data, use, cnd);
4637               if (!cp)
4638                 continue;
4639               if (!iv_ca_has_deps (ivs, cp))
4640                 continue;
4641
4642               if (!cheaper_cost_pair (cp, new_cp))
4643                 continue;
4644
4645               new_cp = cp;
4646             }
4647         }
4648
4649       if (!new_cp)
4650         {
4651           iv_ca_delta_free (delta);
4652           return infinite_cost;
4653         }
4654
4655       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4656     }
4657
4658   iv_ca_delta_commit (data, ivs, *delta, true);
4659   cost = iv_ca_cost (ivs);
4660   iv_ca_delta_commit (data, ivs, *delta, false);
4661
4662   return cost;
4663 }
4664
4665 /* Try optimizing the set of candidates IVS by removing candidates different
4666    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4667    differences in DELTA.  */
4668
4669 static comp_cost
4670 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4671              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4672 {
4673   bitmap_iterator bi;
4674   struct iv_ca_delta *act_delta, *best_delta;
4675   unsigned i;
4676   comp_cost best_cost, acost;
4677   struct iv_cand *cand;
4678
4679   best_delta = NULL;
4680   best_cost = iv_ca_cost (ivs);
4681
4682   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4683     {
4684       cand = iv_cand (data, i);
4685
4686       if (cand == except_cand)
4687         continue;
4688
4689       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4690
4691       if (compare_costs (acost, best_cost) < 0)
4692         {
4693           best_cost = acost;
4694           iv_ca_delta_free (&best_delta);
4695           best_delta = act_delta;
4696         }
4697       else
4698         iv_ca_delta_free (&act_delta);
4699     }
4700
4701   if (!best_delta)
4702     {
4703       *delta = NULL;
4704       return best_cost;
4705     }
4706
4707   /* Recurse to possibly remove other unnecessary ivs.  */
4708   iv_ca_delta_commit (data, ivs, best_delta, true);
4709   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4710   iv_ca_delta_commit (data, ivs, best_delta, false);
4711   *delta = iv_ca_delta_join (best_delta, *delta);
4712   return best_cost;
4713 }
4714
4715 /* Tries to extend the sets IVS in the best possible way in order
4716    to express the USE.  */
4717
4718 static bool
4719 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4720                   struct iv_use *use)
4721 {
4722   comp_cost best_cost, act_cost;
4723   unsigned i;
4724   bitmap_iterator bi;
4725   struct iv_cand *cand;
4726   struct iv_ca_delta *best_delta = NULL, *act_delta;
4727   struct cost_pair *cp;
4728
4729   iv_ca_add_use (data, ivs, use);
4730   best_cost = iv_ca_cost (ivs);
4731
4732   cp = iv_ca_cand_for_use (ivs, use);
4733   if (cp)
4734     {
4735       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4736       iv_ca_set_no_cp (data, ivs, use);
4737     }
4738
4739   /* First try important candidates not based on any memory object.  Only if
4740      this fails, try the specific ones.  Rationale -- in loops with many
4741      variables the best choice often is to use just one generic biv.  If we
4742      added here many ivs specific to the uses, the optimization algorithm later
4743      would be likely to get stuck in a local minimum, thus causing us to create
4744      too many ivs.  The approach from few ivs to more seems more likely to be
4745      successful -- starting from few ivs, replacing an expensive use by a
4746      specific iv should always be a win.  */
4747   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4748     {
4749       cand = iv_cand (data, i);
4750
4751       if (cand->iv->base_object != NULL_TREE)
4752         continue;
4753
4754       if (iv_ca_cand_used_p (ivs, cand))
4755         continue;
4756
4757       cp = get_use_iv_cost (data, use, cand);
4758       if (!cp)
4759         continue;
4760
4761       iv_ca_set_cp (data, ivs, use, cp);
4762       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4763       iv_ca_set_no_cp (data, ivs, use);
4764       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4765
4766       if (compare_costs (act_cost, best_cost) < 0)
4767         {
4768           best_cost = act_cost;
4769
4770           iv_ca_delta_free (&best_delta);
4771           best_delta = act_delta;
4772         }
4773       else
4774         iv_ca_delta_free (&act_delta);
4775     }
4776
4777   if (infinite_cost_p (best_cost))
4778     {
4779       for (i = 0; i < use->n_map_members; i++)
4780         {
4781           cp = use->cost_map + i;
4782           cand = cp->cand;
4783           if (!cand)
4784             continue;
4785
4786           /* Already tried this.  */
4787           if (cand->important && cand->iv->base_object == NULL_TREE)
4788             continue;
4789
4790           if (iv_ca_cand_used_p (ivs, cand))
4791             continue;
4792
4793           act_delta = NULL;
4794           iv_ca_set_cp (data, ivs, use, cp);
4795           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4796           iv_ca_set_no_cp (data, ivs, use);
4797           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4798                                        cp, act_delta);
4799
4800           if (compare_costs (act_cost, best_cost) < 0)
4801             {
4802               best_cost = act_cost;
4803
4804               if (best_delta)
4805                 iv_ca_delta_free (&best_delta);
4806               best_delta = act_delta;
4807             }
4808           else
4809             iv_ca_delta_free (&act_delta);
4810         }
4811     }
4812
4813   iv_ca_delta_commit (data, ivs, best_delta, true);
4814   iv_ca_delta_free (&best_delta);
4815
4816   return !infinite_cost_p (best_cost);
4817 }
4818
4819 /* Finds an initial assignment of candidates to uses.  */
4820
4821 static struct iv_ca *
4822 get_initial_solution (struct ivopts_data *data)
4823 {
4824   struct iv_ca *ivs = iv_ca_new (data);
4825   unsigned i;
4826
4827   for (i = 0; i < n_iv_uses (data); i++)
4828     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4829       {
4830         iv_ca_free (&ivs);
4831         return NULL;
4832       }
4833
4834   return ivs;
4835 }
4836
4837 /* Tries to improve set of induction variables IVS.  */
4838
4839 static bool
4840 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4841 {
4842   unsigned i, n_ivs;
4843   comp_cost acost, best_cost = iv_ca_cost (ivs);
4844   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4845   struct iv_cand *cand;
4846
4847   /* Try extending the set of induction variables by one.  */
4848   for (i = 0; i < n_iv_cands (data); i++)
4849     {
4850       cand = iv_cand (data, i);
4851
4852       if (iv_ca_cand_used_p (ivs, cand))
4853         continue;
4854
4855       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4856       if (!act_delta)
4857         continue;
4858
4859       /* If we successfully added the candidate and the set is small enough,
4860          try optimizing it by removing other candidates.  */
4861       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4862         {
4863           iv_ca_delta_commit (data, ivs, act_delta, true);
4864           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4865           iv_ca_delta_commit (data, ivs, act_delta, false);
4866           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4867         }
4868
4869       if (compare_costs (acost, best_cost) < 0)
4870         {
4871           best_cost = acost;
4872           iv_ca_delta_free (&best_delta);
4873           best_delta = act_delta;
4874         }
4875       else
4876         iv_ca_delta_free (&act_delta);
4877     }
4878
4879   if (!best_delta)
4880     {
4881       /* Try removing the candidates from the set instead.  */
4882       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4883
4884       /* Nothing more we can do.  */
4885       if (!best_delta)
4886         return false;
4887     }
4888
4889   iv_ca_delta_commit (data, ivs, best_delta, true);
4890   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4891   iv_ca_delta_free (&best_delta);
4892   return true;
4893 }
4894
4895 /* Attempts to find the optimal set of induction variables.  We do simple
4896    greedy heuristic -- we try to replace at most one candidate in the selected
4897    solution and remove the unused ivs while this improves the cost.  */
4898
4899 static struct iv_ca *
4900 find_optimal_iv_set (struct ivopts_data *data)
4901 {
4902   unsigned i;
4903   struct iv_ca *set;
4904   struct iv_use *use;
4905
4906   /* Get the initial solution.  */
4907   set = get_initial_solution (data);
4908   if (!set)
4909     {
4910       if (dump_file && (dump_flags & TDF_DETAILS))
4911         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4912       return NULL;
4913     }
4914
4915   if (dump_file && (dump_flags & TDF_DETAILS))
4916     {
4917       fprintf (dump_file, "Initial set of candidates:\n");
4918       iv_ca_dump (data, dump_file, set);
4919     }
4920
4921   while (try_improve_iv_set (data, set))
4922     {
4923       if (dump_file && (dump_flags & TDF_DETAILS))
4924         {
4925           fprintf (dump_file, "Improved to:\n");
4926           iv_ca_dump (data, dump_file, set);
4927         }
4928     }
4929
4930   if (dump_file && (dump_flags & TDF_DETAILS))
4931     {
4932       comp_cost cost = iv_ca_cost (set);
4933       fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4934     }
4935
4936   for (i = 0; i < n_iv_uses (data); i++)
4937     {
4938       use = iv_use (data, i);
4939       use->selected = iv_ca_cand_for_use (set, use)->cand;
4940     }
4941
4942   return set;
4943 }
4944
4945 /* Creates a new induction variable corresponding to CAND.  */
4946
4947 static void
4948 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4949 {
4950   gimple_stmt_iterator incr_pos;
4951   tree base;
4952   bool after = false;
4953
4954   if (!cand->iv)
4955     return;
4956
4957   switch (cand->pos)
4958     {
4959     case IP_NORMAL:
4960       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
4961       break;
4962
4963     case IP_END:
4964       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
4965       after = true;
4966       break;
4967
4968     case IP_ORIGINAL:
4969       /* Mark that the iv is preserved.  */
4970       name_info (data, cand->var_before)->preserve_biv = true;
4971       name_info (data, cand->var_after)->preserve_biv = true;
4972
4973       /* Rewrite the increment so that it uses var_before directly.  */
4974       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4975
4976       return;
4977     }
4978
4979   gimple_add_tmp_var (cand->var_before);
4980   add_referenced_var (cand->var_before);
4981
4982   base = unshare_expr (cand->iv->base);
4983
4984   create_iv (base, unshare_expr (cand->iv->step),
4985              cand->var_before, data->current_loop,
4986              &incr_pos, after, &cand->var_before, &cand->var_after);
4987 }
4988
4989 /* Creates new induction variables described in SET.  */
4990
4991 static void
4992 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4993 {
4994   unsigned i;
4995   struct iv_cand *cand;
4996   bitmap_iterator bi;
4997
4998   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4999     {
5000       cand = iv_cand (data, i);
5001       create_new_iv (data, cand);
5002     }
5003 }
5004
5005 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5006    is true, remove also the ssa name defined by the statement.  */
5007
5008 static void
5009 remove_statement (gimple stmt, bool including_defined_name)
5010 {
5011   gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
5012
5013   if (gimple_code (stmt) == GIMPLE_PHI)
5014     remove_phi_node (&bsi, including_defined_name);
5015   else
5016     {
5017       gsi_remove (&bsi, true);
5018       release_defs (stmt);
5019     }
5020 }
5021
5022 /* Rewrites USE (definition of iv used in a nonlinear expression)
5023    using candidate CAND.  */
5024
5025 static void
5026 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5027                             struct iv_use *use, struct iv_cand *cand)
5028 {
5029   tree comp;
5030   tree op, tgt;
5031   gimple ass;
5032   gimple_stmt_iterator bsi;
5033
5034   /* An important special case -- if we are asked to express value of
5035      the original iv by itself, just exit; there is no need to
5036      introduce a new computation (that might also need casting the
5037      variable to unsigned and back).  */
5038   if (cand->pos == IP_ORIGINAL
5039       && cand->incremented_at == use->stmt)
5040     {
5041       tree step, ctype, utype;
5042       enum tree_code incr_code = PLUS_EXPR, old_code;
5043
5044       gcc_assert (is_gimple_assign (use->stmt));
5045       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5046
5047       step = cand->iv->step;
5048       ctype = TREE_TYPE (step);
5049       utype = TREE_TYPE (cand->var_after);
5050       if (TREE_CODE (step) == NEGATE_EXPR)
5051         {
5052           incr_code = MINUS_EXPR;
5053           step = TREE_OPERAND (step, 0);
5054         }
5055
5056       /* Check whether we may leave the computation unchanged.
5057          This is the case only if it does not rely on other
5058          computations in the loop -- otherwise, the computation
5059          we rely upon may be removed in remove_unused_ivs,
5060          thus leading to ICE.  */
5061       old_code = gimple_assign_rhs_code (use->stmt);
5062       if (old_code == PLUS_EXPR
5063           || old_code == MINUS_EXPR
5064           || old_code == POINTER_PLUS_EXPR)
5065         {
5066           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5067             op = gimple_assign_rhs2 (use->stmt);
5068           else if (old_code != MINUS_EXPR
5069                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5070             op = gimple_assign_rhs1 (use->stmt);
5071           else
5072             op = NULL_TREE;
5073         }
5074       else
5075         op = NULL_TREE;
5076
5077       if (op
5078           && (TREE_CODE (op) == INTEGER_CST
5079               || operand_equal_p (op, step, 0)))
5080         return;
5081
5082       /* Otherwise, add the necessary computations to express
5083          the iv.  */
5084       op = fold_convert (ctype, cand->var_before);
5085       comp = fold_convert (utype,
5086                            build2 (incr_code, ctype, op,
5087                                    unshare_expr (step)));
5088     }
5089   else
5090     {
5091       comp = get_computation (data->current_loop, use, cand);
5092       gcc_assert (comp != NULL_TREE);
5093     }
5094
5095   switch (gimple_code (use->stmt))
5096     {
5097     case GIMPLE_PHI:
5098       tgt = PHI_RESULT (use->stmt);
5099
5100       /* If we should keep the biv, do not replace it.  */
5101       if (name_info (data, tgt)->preserve_biv)
5102         return;
5103
5104       bsi = gsi_after_labels (gimple_bb (use->stmt));
5105       break;
5106
5107     case GIMPLE_ASSIGN:
5108       tgt = gimple_assign_lhs (use->stmt);
5109       bsi = gsi_for_stmt (use->stmt);
5110       break;
5111
5112     default:
5113       gcc_unreachable ();
5114     }
5115
5116   op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5117                                  true, GSI_SAME_STMT);
5118
5119   if (gimple_code (use->stmt) == GIMPLE_PHI)
5120     {
5121       ass = gimple_build_assign (tgt, op);
5122       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5123       remove_statement (use->stmt, false);
5124     }
5125   else
5126     {
5127       gimple_assign_set_rhs_from_tree (&bsi, op);
5128       use->stmt = gsi_stmt (bsi);
5129     }
5130 }
5131
5132 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5133    for_each_index.  */
5134
5135 static bool
5136 idx_remove_ssa_names (tree base, tree *idx,
5137                       void *data ATTRIBUTE_UNUSED)
5138 {
5139   tree *op;
5140
5141   if (TREE_CODE (*idx) == SSA_NAME)
5142     *idx = SSA_NAME_VAR (*idx);
5143
5144   if (TREE_CODE (base) == ARRAY_REF)
5145     {
5146       op = &TREE_OPERAND (base, 2);
5147       if (*op
5148           && TREE_CODE (*op) == SSA_NAME)
5149         *op = SSA_NAME_VAR (*op);
5150       op = &TREE_OPERAND (base, 3);
5151       if (*op
5152           && TREE_CODE (*op) == SSA_NAME)
5153         *op = SSA_NAME_VAR (*op);
5154     }
5155
5156   return true;
5157 }
5158
5159 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5160
5161 static tree
5162 unshare_and_remove_ssa_names (tree ref)
5163 {
5164   ref = unshare_expr (ref);
5165   for_each_index (&ref, idx_remove_ssa_names, NULL);
5166
5167   return ref;
5168 }
5169
5170 /* Extract the alias analysis info for the memory reference REF.  There are
5171    several ways how this information may be stored and what precisely is
5172    its semantics depending on the type of the reference, but there always is
5173    somewhere hidden one _DECL node that is used to determine the set of
5174    virtual operands for the reference.  The code below deciphers this jungle
5175    and extracts this single useful piece of information.  */
5176
5177 static tree
5178 get_ref_tag (tree ref, tree orig)
5179 {
5180   tree var = get_base_address (ref);
5181   tree aref = NULL_TREE, tag, sv;
5182   HOST_WIDE_INT offset, size, maxsize;
5183
5184   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5185     {
5186       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5187       if (ref)
5188         break;
5189     }
5190
5191   if (!var)
5192     return NULL_TREE;
5193
5194   if (TREE_CODE (var) == INDIRECT_REF)
5195     {
5196       /* If the base is a dereference of a pointer, first check its name memory
5197          tag.  If it does not have one, use its symbol memory tag.  */
5198       var = TREE_OPERAND (var, 0);
5199       if (TREE_CODE (var) != SSA_NAME)
5200         return NULL_TREE;
5201
5202       if (SSA_NAME_PTR_INFO (var))
5203         {
5204           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5205           if (tag)
5206             return tag;
5207         }
5208
5209       var = SSA_NAME_VAR (var);
5210       tag = symbol_mem_tag (var);
5211       gcc_assert (tag != NULL_TREE);
5212       return tag;
5213     }
5214   else
5215     {
5216       if (!DECL_P (var))
5217         return NULL_TREE;
5218
5219       tag = symbol_mem_tag (var);
5220       if (tag)
5221         return tag;
5222
5223       return var;
5224     }
5225 }
5226
5227 /* Copies the reference information from OLD_REF to NEW_REF.  */
5228
5229 static void
5230 copy_ref_info (tree new_ref, tree old_ref)
5231 {
5232   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5233     copy_mem_ref_info (new_ref, old_ref);
5234   else
5235     {
5236       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5237       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5238     }
5239 }
5240
5241 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5242
5243 static void
5244 rewrite_use_address (struct ivopts_data *data,
5245                      struct iv_use *use, struct iv_cand *cand)
5246 {
5247   aff_tree aff;
5248   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5249   tree ref;
5250   bool ok;
5251
5252   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5253   gcc_assert (ok);
5254   unshare_aff_combination (&aff);
5255
5256   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5257   copy_ref_info (ref, *use->op_p);
5258   *use->op_p = ref;
5259 }
5260
5261 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5262    candidate CAND.  */
5263
5264 static void
5265 rewrite_use_compare (struct ivopts_data *data,
5266                      struct iv_use *use, struct iv_cand *cand)
5267 {
5268   tree comp, *var_p, op, bound;
5269   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5270   enum tree_code compare;
5271   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5272   bool ok;
5273
5274   bound = cp->value;
5275   if (bound)
5276     {
5277       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5278       tree var_type = TREE_TYPE (var);
5279
5280       compare = iv_elimination_compare (data, use);
5281       bound = unshare_expr (fold_convert (var_type, bound));
5282       op = force_gimple_operand_gsi (&bsi, bound, true, NULL_TREE,
5283                                      true, GSI_SAME_STMT);
5284
5285       gimple_cond_set_lhs (use->stmt, var);
5286       gimple_cond_set_code (use->stmt, compare);
5287       gimple_cond_set_rhs (use->stmt, op);
5288       return;
5289     }
5290
5291   /* The induction variable elimination failed; just express the original
5292      giv.  */
5293   comp = get_computation (data->current_loop, use, cand);
5294   gcc_assert (comp != NULL_TREE);
5295
5296   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5297   gcc_assert (ok);
5298
5299   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5300                                      true, GSI_SAME_STMT);
5301 }
5302
5303 /* Rewrites USE using candidate CAND.  */
5304
5305 static void
5306 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5307 {
5308   push_stmt_changes (&use->stmt);
5309
5310   switch (use->type)
5311     {
5312       case USE_NONLINEAR_EXPR:
5313         rewrite_use_nonlinear_expr (data, use, cand);
5314         break;
5315
5316       case USE_ADDRESS:
5317         rewrite_use_address (data, use, cand);
5318         break;
5319
5320       case USE_COMPARE:
5321         rewrite_use_compare (data, use, cand);
5322         break;
5323
5324       default:
5325         gcc_unreachable ();
5326     }
5327
5328   pop_stmt_changes (&use->stmt);
5329 }
5330
5331 /* Rewrite the uses using the selected induction variables.  */
5332
5333 static void
5334 rewrite_uses (struct ivopts_data *data)
5335 {
5336   unsigned i;
5337   struct iv_cand *cand;
5338   struct iv_use *use;
5339
5340   for (i = 0; i < n_iv_uses (data); i++)
5341     {
5342       use = iv_use (data, i);
5343       cand = use->selected;
5344       gcc_assert (cand);
5345
5346       rewrite_use (data, use, cand);
5347     }
5348 }
5349
5350 /* Removes the ivs that are not used after rewriting.  */
5351
5352 static void
5353 remove_unused_ivs (struct ivopts_data *data)
5354 {
5355   unsigned j;
5356   bitmap_iterator bi;
5357
5358   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5359     {
5360       struct version_info *info;
5361
5362       info = ver_info (data, j);
5363       if (info->iv
5364           && !integer_zerop (info->iv->step)
5365           && !info->inv_id
5366           && !info->iv->have_use_for
5367           && !info->preserve_biv)
5368         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5369     }
5370 }
5371
5372 /* Frees data allocated by the optimization of a single loop.  */
5373
5374 static void
5375 free_loop_data (struct ivopts_data *data)
5376 {
5377   unsigned i, j;
5378   bitmap_iterator bi;
5379   tree obj;
5380
5381   if (data->niters)
5382     {
5383       pointer_map_destroy (data->niters);
5384       data->niters = NULL;
5385     }
5386
5387   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5388     {
5389       struct version_info *info;
5390
5391       info = ver_info (data, i);
5392       if (info->iv)
5393         free (info->iv);
5394       info->iv = NULL;
5395       info->has_nonlin_use = false;
5396       info->preserve_biv = false;
5397       info->inv_id = 0;
5398     }
5399   bitmap_clear (data->relevant);
5400   bitmap_clear (data->important_candidates);
5401
5402   for (i = 0; i < n_iv_uses (data); i++)
5403     {
5404       struct iv_use *use = iv_use (data, i);
5405
5406       free (use->iv);
5407       BITMAP_FREE (use->related_cands);
5408       for (j = 0; j < use->n_map_members; j++)
5409         if (use->cost_map[j].depends_on)
5410           BITMAP_FREE (use->cost_map[j].depends_on);
5411       free (use->cost_map);
5412       free (use);
5413     }
5414   VEC_truncate (iv_use_p, data->iv_uses, 0);
5415
5416   for (i = 0; i < n_iv_cands (data); i++)
5417     {
5418       struct iv_cand *cand = iv_cand (data, i);
5419
5420       if (cand->iv)
5421         free (cand->iv);
5422       if (cand->depends_on)
5423         BITMAP_FREE (cand->depends_on);
5424       free (cand);
5425     }
5426   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5427
5428   if (data->version_info_size < num_ssa_names)
5429     {
5430       data->version_info_size = 2 * num_ssa_names;
5431       free (data->version_info);
5432       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5433     }
5434
5435   data->max_inv_id = 0;
5436
5437   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5438     SET_DECL_RTL (obj, NULL_RTX);
5439
5440   VEC_truncate (tree, decl_rtl_to_reset, 0);
5441 }
5442
5443 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5444    loop tree.  */
5445
5446 static void
5447 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5448 {
5449   free_loop_data (data);
5450   free (data->version_info);
5451   BITMAP_FREE (data->relevant);
5452   BITMAP_FREE (data->important_candidates);
5453
5454   VEC_free (tree, heap, decl_rtl_to_reset);
5455   VEC_free (iv_use_p, heap, data->iv_uses);
5456   VEC_free (iv_cand_p, heap, data->iv_candidates);
5457 }
5458
5459 /* Optimizes the LOOP.  Returns true if anything changed.  */
5460
5461 static bool
5462 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5463 {
5464   bool changed = false;
5465   struct iv_ca *iv_ca;
5466   edge exit;
5467
5468   gcc_assert (!data->niters);
5469   data->current_loop = loop;
5470
5471   if (dump_file && (dump_flags & TDF_DETAILS))
5472     {
5473       fprintf (dump_file, "Processing loop %d\n", loop->num);
5474
5475       exit = single_dom_exit (loop);
5476       if (exit)
5477         {
5478           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5479                    exit->src->index, exit->dest->index);
5480           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5481           fprintf (dump_file, "\n");
5482         }
5483
5484       fprintf (dump_file, "\n");
5485     }
5486
5487   /* For each ssa name determines whether it behaves as an induction variable
5488      in some loop.  */
5489   if (!find_induction_variables (data))
5490     goto finish;
5491
5492   /* Finds interesting uses (item 1).  */
5493   find_interesting_uses (data);
5494   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5495     goto finish;
5496
5497   /* Finds candidates for the induction variables (item 2).  */
5498   find_iv_candidates (data);
5499
5500   /* Calculates the costs (item 3, part 1).  */
5501   determine_use_iv_costs (data);
5502   determine_iv_costs (data);
5503   determine_set_costs (data);
5504
5505   /* Find the optimal set of induction variables (item 3, part 2).  */
5506   iv_ca = find_optimal_iv_set (data);
5507   if (!iv_ca)
5508     goto finish;
5509   changed = true;
5510
5511   /* Create the new induction variables (item 4, part 1).  */
5512   create_new_ivs (data, iv_ca);
5513   iv_ca_free (&iv_ca);
5514
5515   /* Rewrite the uses (item 4, part 2).  */
5516   rewrite_uses (data);
5517
5518   /* Remove the ivs that are unused after rewriting.  */
5519   remove_unused_ivs (data);
5520
5521   /* We have changed the structure of induction variables; it might happen
5522      that definitions in the scev database refer to some of them that were
5523      eliminated.  */
5524   scev_reset ();
5525
5526 finish:
5527   free_loop_data (data);
5528
5529   return changed;
5530 }
5531
5532 /* Main entry point.  Optimizes induction variables in loops.  */
5533
5534 void
5535 tree_ssa_iv_optimize (void)
5536 {
5537   struct loop *loop;
5538   struct ivopts_data data;
5539   loop_iterator li;
5540
5541   tree_ssa_iv_optimize_init (&data);
5542
5543   /* Optimize the loops starting with the innermost ones.  */
5544   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5545     {
5546       if (dump_file && (dump_flags & TDF_DETAILS))
5547         flow_loop_dump (loop, dump_file, NULL, 1);
5548
5549       tree_ssa_iv_optimize_loop (&data, loop);
5550     }
5551
5552   tree_ssa_iv_optimize_finalize (&data);
5553 }