gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "rtl.h"
  70 #include "tm_p.h"
  71 #include "hard-reg-set.h"
  72 #include "basic-block.h"
  73 #include "output.h"
  74 #include "diagnostic.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "varray.h"
  80 #include "expr.h"
  81 #include "tree-pass.h"
  82 #include "ggc.h"
  83 #include "insn-config.h"
  84 #include "recog.h"
  85 #include "pointer-set.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92 #include "tree-affine.h"
  93 #include "target.h"
  94
  95 /* The infinite cost.  */
  96 #define INFTY 10000000
  97
  98 /* The expected number of loop iterations.  TODO -- use profiling instead of
  99    this.  */
 100 #define AVG_LOOP_NITER(LOOP) 5
 101
 102
 103 /* Representation of the induction variable.  */
 104 struct iv
 105 {
 106   tree base;            /* Initial value of the iv.  */
 107   tree base_object;     /* A memory object to that the induction variable points.  */
 108   tree step;            /* Step of the iv (constant only).  */
 109   tree ssa_name;        /* The ssa name with the value.  */
 110   bool biv_p;           /* Is it a biv?  */
 111   bool have_use_for;    /* Do we already have a use for it?  */
 112   unsigned use_id;      /* The identifier in the use if it is the case.  */
 113 };
 114
 115 /* Per-ssa version information (induction variable descriptions, etc.).  */
 116 struct version_info
 117 {
 118   tree name;            /* The ssa name.  */
 119   struct iv *iv;        /* Induction variable description.  */
 120   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 121                            an expression that is not an induction variable.  */
 122   unsigned inv_id;      /* Id of an invariant.  */
 123   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 124 };
 125
 126 /* Types of uses.  */
 127 enum use_type
 128 {
 129   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 130   USE_ADDRESS,          /* Use in an address.  */
 131   USE_COMPARE           /* Use is a compare.  */
 132 };
 133
 134 /* Cost of a computation.  */
 135 typedef struct
 136 {
 137   unsigned cost;        /* The runtime cost.  */
 138   unsigned complexity;  /* The estimate of the complexity of the code for
 139                            the computation (in no concrete units --
 140                            complexity field should be larger for more
 141                            complex expressions and addressing modes).  */
 142 } comp_cost;
 143
 144 static const comp_cost zero_cost = {0, 0};
 145 static const comp_cost infinite_cost = {INFTY, INFTY};
 146
 147 /* The candidate - cost pair.  */
 148 struct cost_pair
 149 {
 150   struct iv_cand *cand; /* The candidate.  */
 151   comp_cost cost;       /* The cost.  */
 152   bitmap depends_on;    /* The list of invariants that have to be
 153                            preserved.  */
 154   tree value;           /* For final value elimination, the expression for
 155                            the final value of the iv.  For iv elimination,
 156                            the new bound to compare with.  */
 157 };
 158
 159 /* Use.  */
 160 struct iv_use
 161 {
 162   unsigned id;          /* The id of the use.  */
 163   enum use_type type;   /* Type of the use.  */
 164   struct iv *iv;        /* The induction variable it is based on.  */
 165   tree stmt;            /* Statement in that it occurs.  */
 166   tree *op_p;           /* The place where it occurs.  */
 167   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 168                            important ones.  */
 169
 170   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 171   struct cost_pair *cost_map;
 172                         /* The costs wrto the iv candidates.  */
 173
 174   struct iv_cand *selected;
 175                         /* The selected candidate.  */
 176 };
 177
 178 /* The position where the iv is computed.  */
 179 enum iv_position
 180 {
 181   IP_NORMAL,            /* At the end, just before the exit condition.  */
 182   IP_END,               /* At the end of the latch block.  */
 183   IP_ORIGINAL           /* The original biv.  */
 184 };
 185
 186 /* The induction variable candidate.  */
 187 struct iv_cand
 188 {
 189   unsigned id;          /* The number of the candidate.  */
 190   bool important;       /* Whether this is an "important" candidate, i.e. such
 191                            that it should be considered by all uses.  */
 192   enum iv_position pos; /* Where it is computed.  */
 193   tree incremented_at;  /* For original biv, the statement where it is
 194                            incremented.  */
 195   tree var_before;      /* The variable used for it before increment.  */
 196   tree var_after;       /* The variable used for it after increment.  */
 197   struct iv *iv;        /* The value of the candidate.  NULL for
 198                            "pseudocandidate" used to indicate the possibility
 199                            to replace the final value of an iv by direct
 200                            computation of the value.  */
 201   unsigned cost;        /* Cost of the candidate.  */
 202   bitmap depends_on;    /* The list of invariants that are used in step of the
 203                            biv.  */
 204 };
 205
 206 /* The data used by the induction variable optimizations.  */
 207
 208 typedef struct iv_use *iv_use_p;
 209 DEF_VEC_P(iv_use_p);
 210 DEF_VEC_ALLOC_P(iv_use_p,heap);
 211
 212 typedef struct iv_cand *iv_cand_p;
 213 DEF_VEC_P(iv_cand_p);
 214 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 215
 216 struct ivopts_data
 217 {
 218   /* The currently optimized loop.  */
 219   struct loop *current_loop;
 220
 221   /* Number of registers used in it.  */
 222   unsigned regs_used;
 223
 224   /* Numbers of iterations for all exits of the current loop.  */
 225   struct pointer_map_t *niters;
 226
 227   /* The size of version_info array allocated.  */
 228   unsigned version_info_size;
 229
 230   /* The array of information for the ssa names.  */
 231   struct version_info *version_info;
 232
 233   /* The bitmap of indices in version_info whose value was changed.  */
 234   bitmap relevant;
 235
 236   /* The maximum invariant id.  */
 237   unsigned max_inv_id;
 238
 239   /* The uses of induction variables.  */
 240   VEC(iv_use_p,heap) *iv_uses;
 241
 242   /* The candidates.  */
 243   VEC(iv_cand_p,heap) *iv_candidates;
 244
 245   /* A bitmap of important candidates.  */
 246   bitmap important_candidates;
 247
 248   /* Whether to consider just related and important candidates when replacing a
 249      use.  */
 250   bool consider_all_candidates;
 251 };
 252
 253 /* An assignment of iv candidates to uses.  */
 254
 255 struct iv_ca
 256 {
 257   /* The number of uses covered by the assignment.  */
 258   unsigned upto;
 259
 260   /* Number of uses that cannot be expressed by the candidates in the set.  */
 261   unsigned bad_uses;
 262
 263   /* Candidate assigned to a use, together with the related costs.  */
 264   struct cost_pair **cand_for_use;
 265
 266   /* Number of times each candidate is used.  */
 267   unsigned *n_cand_uses;
 268
 269   /* The candidates used.  */
 270   bitmap cands;
 271
 272   /* The number of candidates in the set.  */
 273   unsigned n_cands;
 274
 275   /* Total number of registers needed.  */
 276   unsigned n_regs;
 277
 278   /* Total cost of expressing uses.  */
 279   comp_cost cand_use_cost;
 280
 281   /* Total cost of candidates.  */
 282   unsigned cand_cost;
 283
 284   /* Number of times each invariant is used.  */
 285   unsigned *n_invariant_uses;
 286
 287   /* Total cost of the assignment.  */
 288   comp_cost cost;
 289 };
 290
 291 /* Difference of two iv candidate assignments.  */
 292
 293 struct iv_ca_delta
 294 {
 295   /* Changed use.  */
 296   struct iv_use *use;
 297
 298   /* An old assignment (for rollback purposes).  */
 299   struct cost_pair *old_cp;
 300
 301   /* A new assignment.  */
 302   struct cost_pair *new_cp;
 303
 304   /* Next change in the list.  */
 305   struct iv_ca_delta *next_change;
 306 };
 307
 308 /* Bound on number of candidates below that all candidates are considered.  */
 309
 310 #define CONSIDER_ALL_CANDIDATES_BOUND \
 311   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 312
 313 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 314    optimizing such a loop would help, and it would take ages).  */
 315
 316 #define MAX_CONSIDERED_USES \
 317   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 318
 319 /* If there are at most this number of ivs in the set, try removing unnecessary
 320    ivs from the set always.  */
 321
 322 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 323   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 324
 325 /* The list of trees for that the decl_rtl field must be reset is stored
 326    here.  */
 327
 328 static VEC(tree,heap) *decl_rtl_to_reset;
 329
 330 /* Number of uses recorded in DATA.  */
 331
 332 static inline unsigned
 333 n_iv_uses (struct ivopts_data *data)
 334 {
 335   return VEC_length (iv_use_p, data->iv_uses);
 336 }
 337
 338 /* Ith use recorded in DATA.  */
 339
 340 static inline struct iv_use *
 341 iv_use (struct ivopts_data *data, unsigned i)
 342 {
 343   return VEC_index (iv_use_p, data->iv_uses, i);
 344 }
 345
 346 /* Number of candidates recorded in DATA.  */
 347
 348 static inline unsigned
 349 n_iv_cands (struct ivopts_data *data)
 350 {
 351   return VEC_length (iv_cand_p, data->iv_candidates);
 352 }
 353
 354 /* Ith candidate recorded in DATA.  */
 355
 356 static inline struct iv_cand *
 357 iv_cand (struct ivopts_data *data, unsigned i)
 358 {
 359   return VEC_index (iv_cand_p, data->iv_candidates, i);
 360 }
 361
 362 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 363
 364 edge
 365 single_dom_exit (struct loop *loop)
 366 {
 367   edge exit = single_exit (loop);
 368
 369   if (!exit)
 370     return NULL;
 371
 372   if (!just_once_each_iteration_p (loop, exit->src))
 373     return NULL;
 374
 375   return exit;
 376 }
 377
 378 /* Dumps information about the induction variable IV to FILE.  */
 379
 380 extern void dump_iv (FILE *, struct iv *);
 381 void
 382 dump_iv (FILE *file, struct iv *iv)
 383 {
 384   if (iv->ssa_name)
 385     {
 386       fprintf (file, "ssa name ");
 387       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 388       fprintf (file, "\n");
 389     }
 390
 391   fprintf (file, "  type ");
 392   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 393   fprintf (file, "\n");
 394
 395   if (iv->step)
 396     {
 397       fprintf (file, "  base ");
 398       print_generic_expr (file, iv->base, TDF_SLIM);
 399       fprintf (file, "\n");
 400
 401       fprintf (file, "  step ");
 402       print_generic_expr (file, iv->step, TDF_SLIM);
 403       fprintf (file, "\n");
 404     }
 405   else
 406     {
 407       fprintf (file, "  invariant ");
 408       print_generic_expr (file, iv->base, TDF_SLIM);
 409       fprintf (file, "\n");
 410     }
 411
 412   if (iv->base_object)
 413     {
 414       fprintf (file, "  base object ");
 415       print_generic_expr (file, iv->base_object, TDF_SLIM);
 416       fprintf (file, "\n");
 417     }
 418
 419   if (iv->biv_p)
 420     fprintf (file, "  is a biv\n");
 421 }
 422
 423 /* Dumps information about the USE to FILE.  */
 424
 425 extern void dump_use (FILE *, struct iv_use *);
 426 void
 427 dump_use (FILE *file, struct iv_use *use)
 428 {
 429   fprintf (file, "use %d\n", use->id);
 430
 431   switch (use->type)
 432     {
 433     case USE_NONLINEAR_EXPR:
 434       fprintf (file, "  generic\n");
 435       break;
 436
 437     case USE_ADDRESS:
 438       fprintf (file, "  address\n");
 439       break;
 440
 441     case USE_COMPARE:
 442       fprintf (file, "  compare\n");
 443       break;
 444
 445     default:
 446       gcc_unreachable ();
 447     }
 448
 449   fprintf (file, "  in statement ");
 450   print_generic_expr (file, use->stmt, TDF_SLIM);
 451   fprintf (file, "\n");
 452
 453   fprintf (file, "  at position ");
 454   if (use->op_p)
 455     print_generic_expr (file, *use->op_p, TDF_SLIM);
 456   fprintf (file, "\n");
 457
 458   dump_iv (file, use->iv);
 459
 460   if (use->related_cands)
 461     {
 462       fprintf (file, "  related candidates ");
 463       dump_bitmap (file, use->related_cands);
 464     }
 465 }
 466
 467 /* Dumps information about the uses to FILE.  */
 468
 469 extern void dump_uses (FILE *, struct ivopts_data *);
 470 void
 471 dump_uses (FILE *file, struct ivopts_data *data)
 472 {
 473   unsigned i;
 474   struct iv_use *use;
 475
 476   for (i = 0; i < n_iv_uses (data); i++)
 477     {
 478       use = iv_use (data, i);
 479
 480       dump_use (file, use);
 481       fprintf (file, "\n");
 482     }
 483 }
 484
 485 /* Dumps information about induction variable candidate CAND to FILE.  */
 486
 487 extern void dump_cand (FILE *, struct iv_cand *);
 488 void
 489 dump_cand (FILE *file, struct iv_cand *cand)
 490 {
 491   struct iv *iv = cand->iv;
 492
 493   fprintf (file, "candidate %d%s\n",
 494            cand->id, cand->important ? " (important)" : "");
 495
 496   if (cand->depends_on)
 497     {
 498       fprintf (file, "  depends on ");
 499       dump_bitmap (file, cand->depends_on);
 500     }
 501
 502   if (!iv)
 503     {
 504       fprintf (file, "  final value replacement\n");
 505       return;
 506     }
 507
 508   switch (cand->pos)
 509     {
 510     case IP_NORMAL:
 511       fprintf (file, "  incremented before exit test\n");
 512       break;
 513
 514     case IP_END:
 515       fprintf (file, "  incremented at end\n");
 516       break;
 517
 518     case IP_ORIGINAL:
 519       fprintf (file, "  original biv\n");
 520       break;
 521     }
 522
 523   dump_iv (file, iv);
 524 }
 525
 526 /* Returns the info for ssa version VER.  */
 527
 528 static inline struct version_info *
 529 ver_info (struct ivopts_data *data, unsigned ver)
 530 {
 531   return data->version_info + ver;
 532 }
 533
 534 /* Returns the info for ssa name NAME.  */
 535
 536 static inline struct version_info *
 537 name_info (struct ivopts_data *data, tree name)
 538 {
 539   return ver_info (data, SSA_NAME_VERSION (name));
 540 }
 541
 542 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 543    emitted in LOOP.  */
 544
 545 static bool
 546 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 547 {
 548   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 549
 550   gcc_assert (bb);
 551
 552   if (sbb == loop->latch)
 553     return true;
 554
 555   if (sbb != bb)
 556     return false;
 557
 558   return stmt == last_stmt (bb);
 559 }
 560
 561 /* Returns true if STMT if after the place where the original induction
 562    variable CAND is incremented.  */
 563
 564 static bool
 565 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 566 {
 567   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 568   basic_block stmt_bb = bb_for_stmt (stmt);
 569   block_stmt_iterator bsi;
 570
 571   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 572     return false;
 573
 574   if (stmt_bb != cand_bb)
 575     return true;
 576
 577   /* Scan the block from the end, since the original ivs are usually
 578      incremented at the end of the loop body.  */
 579   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 580     {
 581       if (bsi_stmt (bsi) == cand->incremented_at)
 582         return false;
 583       if (bsi_stmt (bsi) == stmt)
 584         return true;
 585     }
 586 }
 587
 588 /* Returns true if STMT if after the place where the induction variable
 589    CAND is incremented in LOOP.  */
 590
 591 static bool
 592 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 593 {
 594   switch (cand->pos)
 595     {
 596     case IP_END:
 597       return false;
 598
 599     case IP_NORMAL:
 600       return stmt_after_ip_normal_pos (loop, stmt);
 601
 602     case IP_ORIGINAL:
 603       return stmt_after_ip_original_pos (cand, stmt);
 604
 605     default:
 606       gcc_unreachable ();
 607     }
 608 }
 609
 610 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 611
 612 static bool
 613 abnormal_ssa_name_p (tree exp)
 614 {
 615   if (!exp)
 616     return false;
 617
 618   if (TREE_CODE (exp) != SSA_NAME)
 619     return false;
 620
 621   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 622 }
 623
 624 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 625    abnormal phi node.  Callback for for_each_index.  */
 626
 627 static bool
 628 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 629                                   void *data ATTRIBUTE_UNUSED)
 630 {
 631   if (TREE_CODE (base) == ARRAY_REF)
 632     {
 633       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 634         return false;
 635       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 636         return false;
 637     }
 638
 639   return !abnormal_ssa_name_p (*index);
 640 }
 641
 642 /* Returns true if EXPR contains a ssa name that occurs in an
 643    abnormal phi node.  */
 644
 645 bool
 646 contains_abnormal_ssa_name_p (tree expr)
 647 {
 648   enum tree_code code;
 649   enum tree_code_class codeclass;
 650
 651   if (!expr)
 652     return false;
 653
 654   code = TREE_CODE (expr);
 655   codeclass = TREE_CODE_CLASS (code);
 656
 657   if (code == SSA_NAME)
 658     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 659
 660   if (code == INTEGER_CST
 661       || is_gimple_min_invariant (expr))
 662     return false;
 663
 664   if (code == ADDR_EXPR)
 665     return !for_each_index (&TREE_OPERAND (expr, 0),
 666                             idx_contains_abnormal_ssa_name_p,
 667                             NULL);
 668
 669   switch (codeclass)
 670     {
 671     case tcc_binary:
 672     case tcc_comparison:
 673       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 674         return true;
 675
 676       /* Fallthru.  */
 677     case tcc_unary:
 678       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 679         return true;
 680
 681       break;
 682
 683     default:
 684       gcc_unreachable ();
 685     }
 686
 687   return false;
 688 }
 689
 690 /*  Returns tree describing number of iterations determined from
 691     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 692
 693 static tree
 694 niter_for_exit (struct ivopts_data *data, edge exit)
 695 {
 696   struct tree_niter_desc desc;
 697   tree niter;
 698   void **slot;
 699
 700   if (!data->niters)
 701     {
 702       data->niters = pointer_map_create ();
 703       slot = NULL;
 704     }
 705   else
 706     slot = pointer_map_contains (data->niters, exit);
 707
 708   if (!slot)
 709     {
 710       /* Try to determine number of iterations.  We must know it
 711          unconditionally (i.e., without possibility of # of iterations
 712          being zero).  Also, we cannot safely work with ssa names that
 713          appear in phi nodes on abnormal edges, so that we do not create
 714          overlapping life ranges for them (PR 27283).  */
 715       if (number_of_iterations_exit (data->current_loop,
 716                                      exit, &desc, true)
 717           && integer_zerop (desc.may_be_zero)
 718           && !contains_abnormal_ssa_name_p (desc.niter))
 719         niter = desc.niter;
 720       else
 721         niter = NULL_TREE;
 722
 723       *pointer_map_insert (data->niters, exit) = niter;
 724     }
 725   else
 726     niter = (tree) *slot;
 727
 728   return niter;
 729 }
 730
 731 /* Returns tree describing number of iterations determined from
 732    single dominating exit of DATA->current_loop, or NULL if something
 733    goes wrong.  */
 734
 735 static tree
 736 niter_for_single_dom_exit (struct ivopts_data *data)
 737 {
 738   edge exit = single_dom_exit (data->current_loop);
 739
 740   if (!exit)
 741     return NULL;
 742
 743   return niter_for_exit (data, exit);
 744 }
 745
 746 /* Initializes data structures used by the iv optimization pass, stored
 747    in DATA.  */
 748
 749 static void
 750 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 751 {
 752   data->version_info_size = 2 * num_ssa_names;
 753   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 754   data->relevant = BITMAP_ALLOC (NULL);
 755   data->important_candidates = BITMAP_ALLOC (NULL);
 756   data->max_inv_id = 0;
 757   data->niters = NULL;
 758   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 759   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 760   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 761 }
 762
 763 /* Returns a memory object to that EXPR points.  In case we are able to
 764    determine that it does not point to any such object, NULL is returned.  */
 765
 766 static tree
 767 determine_base_object (tree expr)
 768 {
 769   enum tree_code code = TREE_CODE (expr);
 770   tree base, obj;
 771
 772   /* If this is a pointer casted to any type, we need to determine
 773      the base object for the pointer; so handle conversions before
 774      throwing away non-pointer expressions.  */
 775   if (TREE_CODE (expr) == NOP_EXPR
 776       || TREE_CODE (expr) == CONVERT_EXPR)
 777     return determine_base_object (TREE_OPERAND (expr, 0));
 778
 779   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 780     return NULL_TREE;
 781
 782   switch (code)
 783     {
 784     case INTEGER_CST:
 785       return NULL_TREE;
 786
 787     case ADDR_EXPR:
 788       obj = TREE_OPERAND (expr, 0);
 789       base = get_base_address (obj);
 790
 791       if (!base)
 792         return expr;
 793
 794       if (TREE_CODE (base) == INDIRECT_REF)
 795         return determine_base_object (TREE_OPERAND (base, 0));
 796
 797       return fold_convert (ptr_type_node,
 798                            build_fold_addr_expr (base));
 799
 800     case POINTER_PLUS_EXPR:
 801       return determine_base_object (TREE_OPERAND (expr, 0));
 802
 803     case PLUS_EXPR:
 804     case MINUS_EXPR:
 805       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 806       gcc_unreachable ();
 807
 808     default:
 809       return fold_convert (ptr_type_node, expr);
 810     }
 811 }
 812
 813 /* Allocates an induction variable with given initial value BASE and step STEP
 814    for loop LOOP.  */
 815
 816 static struct iv *
 817 alloc_iv (tree base, tree step)
 818 {
 819   struct iv *iv = XCNEW (struct iv);
 820   gcc_assert (step != NULL_TREE);
 821
 822   iv->base = base;
 823   iv->base_object = determine_base_object (base);
 824   iv->step = step;
 825   iv->biv_p = false;
 826   iv->have_use_for = false;
 827   iv->use_id = 0;
 828   iv->ssa_name = NULL_TREE;
 829
 830   return iv;
 831 }
 832
 833 /* Sets STEP and BASE for induction variable IV.  */
 834
 835 static void
 836 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 837 {
 838   struct version_info *info = name_info (data, iv);
 839
 840   gcc_assert (!info->iv);
 841
 842   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 843   info->iv = alloc_iv (base, step);
 844   info->iv->ssa_name = iv;
 845 }
 846
 847 /* Finds induction variable declaration for VAR.  */
 848
 849 static struct iv *
 850 get_iv (struct ivopts_data *data, tree var)
 851 {
 852   basic_block bb;
 853   tree type = TREE_TYPE (var);
 854
 855   if (!POINTER_TYPE_P (type)
 856       && !INTEGRAL_TYPE_P (type))
 857     return NULL;
 858
 859   if (!name_info (data, var)->iv)
 860     {
 861       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 862
 863       if (!bb
 864           || !flow_bb_inside_loop_p (data->current_loop, bb))
 865         set_iv (data, var, var, build_int_cst (type, 0));
 866     }
 867
 868   return name_info (data, var)->iv;
 869 }
 870
 871 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 872    not define a simple affine biv with nonzero step.  */
 873
 874 static tree
 875 determine_biv_step (tree phi)
 876 {
 877   struct loop *loop = bb_for_stmt (phi)->loop_father;
 878   tree name = PHI_RESULT (phi);
 879   affine_iv iv;
 880
 881   if (!is_gimple_reg (name))
 882     return NULL_TREE;
 883
 884   if (!simple_iv (loop, phi, name, &iv, true))
 885     return NULL_TREE;
 886
 887   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 888 }
 889
 890 /* Finds basic ivs.  */
 891
 892 static bool
 893 find_bivs (struct ivopts_data *data)
 894 {
 895   tree phi, step, type, base;
 896   bool found = false;
 897   struct loop *loop = data->current_loop;
 898
 899   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 900     {
 901       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 902         continue;
 903
 904       step = determine_biv_step (phi);
 905       if (!step)
 906         continue;
 907
 908       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 909       base = expand_simple_operations (base);
 910       if (contains_abnormal_ssa_name_p (base)
 911           || contains_abnormal_ssa_name_p (step))
 912         continue;
 913
 914       type = TREE_TYPE (PHI_RESULT (phi));
 915       base = fold_convert (type, base);
 916       if (step)
 917         step = fold_convert (type, step);
 918
 919       set_iv (data, PHI_RESULT (phi), base, step);
 920       found = true;
 921     }
 922
 923   return found;
 924 }
 925
 926 /* Marks basic ivs.  */
 927
 928 static void
 929 mark_bivs (struct ivopts_data *data)
 930 {
 931   tree phi, var;
 932   struct iv *iv, *incr_iv;
 933   struct loop *loop = data->current_loop;
 934   basic_block incr_bb;
 935
 936   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 937     {
 938       iv = get_iv (data, PHI_RESULT (phi));
 939       if (!iv)
 940         continue;
 941
 942       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 943       incr_iv = get_iv (data, var);
 944       if (!incr_iv)
 945         continue;
 946
 947       /* If the increment is in the subloop, ignore it.  */
 948       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 949       if (incr_bb->loop_father != data->current_loop
 950           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 951         continue;
 952
 953       iv->biv_p = true;
 954       incr_iv->biv_p = true;
 955     }
 956 }
 957
 958 /* Checks whether STMT defines a linear induction variable and stores its
 959    parameters to IV.  */
 960
 961 static bool
 962 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 963 {
 964   tree lhs;
 965   struct loop *loop = data->current_loop;
 966
 967   iv->base = NULL_TREE;
 968   iv->step = NULL_TREE;
 969
 970   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 971     return false;
 972
 973   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 974   if (TREE_CODE (lhs) != SSA_NAME)
 975     return false;
 976
 977   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 978     return false;
 979   iv->base = expand_simple_operations (iv->base);
 980
 981   if (contains_abnormal_ssa_name_p (iv->base)
 982       || contains_abnormal_ssa_name_p (iv->step))
 983     return false;
 984
 985   return true;
 986 }
 987
 988 /* Finds general ivs in statement STMT.  */
 989
 990 static void
 991 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
 992 {
 993   affine_iv iv;
 994
 995   if (!find_givs_in_stmt_scev (data, stmt, &iv))
 996     return;
 997
 998   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
 999 }
1000
1001 /* Finds general ivs in basic block BB.  */
1002
1003 static void
1004 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1005 {
1006   block_stmt_iterator bsi;
1007
1008   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1009     find_givs_in_stmt (data, bsi_stmt (bsi));
1010 }
1011
1012 /* Finds general ivs.  */
1013
1014 static void
1015 find_givs (struct ivopts_data *data)
1016 {
1017   struct loop *loop = data->current_loop;
1018   basic_block *body = get_loop_body_in_dom_order (loop);
1019   unsigned i;
1020
1021   for (i = 0; i < loop->num_nodes; i++)
1022     find_givs_in_bb (data, body[i]);
1023   free (body);
1024 }
1025
1026 /* For each ssa name defined in LOOP determines whether it is an induction
1027    variable and if so, its initial value and step.  */
1028
1029 static bool
1030 find_induction_variables (struct ivopts_data *data)
1031 {
1032   unsigned i;
1033   bitmap_iterator bi;
1034
1035   if (!find_bivs (data))
1036     return false;
1037
1038   find_givs (data);
1039   mark_bivs (data);
1040
1041   if (dump_file && (dump_flags & TDF_DETAILS))
1042     {
1043       tree niter = niter_for_single_dom_exit (data);
1044
1045       if (niter)
1046         {
1047           fprintf (dump_file, "  number of iterations ");
1048           print_generic_expr (dump_file, niter, TDF_SLIM);
1049           fprintf (dump_file, "\n\n");
1050         };
1051
1052       fprintf (dump_file, "Induction variables:\n\n");
1053
1054       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1055         {
1056           if (ver_info (data, i)->iv)
1057             dump_iv (dump_file, ver_info (data, i)->iv);
1058         }
1059     }
1060
1061   return true;
1062 }
1063
1064 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1065
1066 static struct iv_use *
1067 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1068             tree stmt, enum use_type use_type)
1069 {
1070   struct iv_use *use = XCNEW (struct iv_use);
1071
1072   use->id = n_iv_uses (data);
1073   use->type = use_type;
1074   use->iv = iv;
1075   use->stmt = stmt;
1076   use->op_p = use_p;
1077   use->related_cands = BITMAP_ALLOC (NULL);
1078
1079   /* To avoid showing ssa name in the dumps, if it was not reset by the
1080      caller.  */
1081   iv->ssa_name = NULL_TREE;
1082
1083   if (dump_file && (dump_flags & TDF_DETAILS))
1084     dump_use (dump_file, use);
1085
1086   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1087
1088   return use;
1089 }
1090
1091 /* Checks whether OP is a loop-level invariant and if so, records it.
1092    NONLINEAR_USE is true if the invariant is used in a way we do not
1093    handle specially.  */
1094
1095 static void
1096 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1097 {
1098   basic_block bb;
1099   struct version_info *info;
1100
1101   if (TREE_CODE (op) != SSA_NAME
1102       || !is_gimple_reg (op))
1103     return;
1104
1105   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1106   if (bb
1107       && flow_bb_inside_loop_p (data->current_loop, bb))
1108     return;
1109
1110   info = name_info (data, op);
1111   info->name = op;
1112   info->has_nonlin_use |= nonlinear_use;
1113   if (!info->inv_id)
1114     info->inv_id = ++data->max_inv_id;
1115   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1116 }
1117
1118 /* Checks whether the use OP is interesting and if so, records it.  */
1119
1120 static struct iv_use *
1121 find_interesting_uses_op (struct ivopts_data *data, tree op)
1122 {
1123   struct iv *iv;
1124   struct iv *civ;
1125   tree stmt;
1126   struct iv_use *use;
1127
1128   if (TREE_CODE (op) != SSA_NAME)
1129     return NULL;
1130
1131   iv = get_iv (data, op);
1132   if (!iv)
1133     return NULL;
1134
1135   if (iv->have_use_for)
1136     {
1137       use = iv_use (data, iv->use_id);
1138
1139       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1140       return use;
1141     }
1142
1143   if (integer_zerop (iv->step))
1144     {
1145       record_invariant (data, op, true);
1146       return NULL;
1147     }
1148   iv->have_use_for = true;
1149
1150   civ = XNEW (struct iv);
1151   *civ = *iv;
1152
1153   stmt = SSA_NAME_DEF_STMT (op);
1154   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1155               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1156
1157   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1158   iv->use_id = use->id;
1159
1160   return use;
1161 }
1162
1163 /* Given a condition *COND_P, checks whether it is a compare of an induction
1164    variable and an invariant.  If this is the case, CONTROL_VAR is set
1165    to location of the iv, BOUND to the location of the invariant,
1166    IV_VAR and IV_BOUND are set to the corresponding induction variable
1167    descriptions, and true is returned.  If this is not the case,
1168    CONTROL_VAR and BOUND are set to the arguments of the condition and
1169    false is returned.  */
1170
1171 static bool
1172 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1173                        tree **control_var, tree **bound,
1174                        struct iv **iv_var, struct iv **iv_bound)
1175 {
1176   /* The nodes returned when COND has just one operand.  Note that you should
1177      not modify anything in BOUND or IV_BOUND because of this.  */
1178   static struct iv const_iv;
1179   static tree zero;
1180   tree cond = *cond_p;
1181   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1182   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1183   bool ret = false;
1184
1185   zero = integer_zero_node;
1186   const_iv.step = integer_zero_node;
1187
1188   if (TREE_CODE (cond) == SSA_NAME)
1189     {
1190       op0 = cond_p;
1191       iv0 = get_iv (data, cond);
1192       ret = (iv0 && !integer_zerop (iv0->step));
1193       goto end;
1194     }
1195
1196   if (!COMPARISON_CLASS_P (cond))
1197     {
1198       op0 = cond_p;
1199       goto end;
1200     }
1201
1202   op0 = &TREE_OPERAND (cond, 0);
1203   op1 = &TREE_OPERAND (cond, 1);
1204   if (TREE_CODE (*op0) == SSA_NAME)
1205     iv0 = get_iv (data, *op0);
1206   if (TREE_CODE (*op1) == SSA_NAME)
1207     iv1 = get_iv (data, *op1);
1208
1209   /* Exactly one of the compared values must be an iv, and the other one must
1210      be an invariant.  */
1211   if (!iv0 || !iv1)
1212     goto end;
1213
1214   if (integer_zerop (iv0->step))
1215     {
1216       /* Control variable may be on the other side.  */
1217       tmp_op = op0; op0 = op1; op1 = tmp_op;
1218       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1219     }
1220   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1221
1222 end:
1223   if (control_var)
1224     *control_var = op0;;
1225   if (iv_var)
1226     *iv_var = iv0;;
1227   if (bound)
1228     *bound = op1;
1229   if (iv_bound)
1230     *iv_bound = iv1;
1231
1232   return ret;
1233 }
1234
1235 /* Checks whether the condition *COND_P in STMT is interesting
1236    and if so, records it.  */
1237
1238 static void
1239 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1240 {
1241   tree *var_p, *bound_p;
1242   struct iv *var_iv, *civ;
1243
1244   if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1245     {
1246       find_interesting_uses_op (data, *var_p);
1247       find_interesting_uses_op (data, *bound_p);
1248       return;
1249     }
1250
1251   civ = XNEW (struct iv);
1252   *civ = *var_iv;
1253   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1254 }
1255
1256 /* Returns true if expression EXPR is obviously invariant in LOOP,
1257    i.e. if all its operands are defined outside of the LOOP.  */
1258
1259 bool
1260 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1261 {
1262   basic_block def_bb;
1263   unsigned i, len;
1264
1265   if (is_gimple_min_invariant (expr))
1266     return true;
1267
1268   if (TREE_CODE (expr) == SSA_NAME)
1269     {
1270       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1271       if (def_bb
1272           && flow_bb_inside_loop_p (loop, def_bb))
1273         return false;
1274
1275       return true;
1276     }
1277
1278   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1279     return false;
1280
1281   len = TREE_OPERAND_LENGTH (expr);
1282   for (i = 0; i < len; i++)
1283     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1284       return false;
1285
1286   return true;
1287 }
1288
1289 /* Cumulates the steps of indices into DATA and replaces their values with the
1290    initial ones.  Returns false when the value of the index cannot be determined.
1291    Callback for for_each_index.  */
1292
1293 struct ifs_ivopts_data
1294 {
1295   struct ivopts_data *ivopts_data;
1296   tree stmt;
1297   tree step;
1298 };
1299
1300 static bool
1301 idx_find_step (tree base, tree *idx, void *data)
1302 {
1303   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1304   struct iv *iv;
1305   tree step, iv_base, iv_step, lbound, off;
1306   struct loop *loop = dta->ivopts_data->current_loop;
1307
1308   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1309       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1310     return false;
1311
1312   /* If base is a component ref, require that the offset of the reference
1313      be invariant.  */
1314   if (TREE_CODE (base) == COMPONENT_REF)
1315     {
1316       off = component_ref_field_offset (base);
1317       return expr_invariant_in_loop_p (loop, off);
1318     }
1319
1320   /* If base is array, first check whether we will be able to move the
1321      reference out of the loop (in order to take its address in strength
1322      reduction).  In order for this to work we need both lower bound
1323      and step to be loop invariants.  */
1324   if (TREE_CODE (base) == ARRAY_REF)
1325     {
1326       step = array_ref_element_size (base);
1327       lbound = array_ref_low_bound (base);
1328
1329       if (!expr_invariant_in_loop_p (loop, step)
1330           || !expr_invariant_in_loop_p (loop, lbound))
1331         return false;
1332     }
1333
1334   if (TREE_CODE (*idx) != SSA_NAME)
1335     return true;
1336
1337   iv = get_iv (dta->ivopts_data, *idx);
1338   if (!iv)
1339     return false;
1340
1341   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1342           *&x[0], which is not folded and does not trigger the
1343           ARRAY_REF path below.  */
1344   *idx = iv->base;
1345
1346   if (integer_zerop (iv->step))
1347     return true;
1348
1349   if (TREE_CODE (base) == ARRAY_REF)
1350     {
1351       step = array_ref_element_size (base);
1352
1353       /* We only handle addresses whose step is an integer constant.  */
1354       if (TREE_CODE (step) != INTEGER_CST)
1355         return false;
1356     }
1357   else
1358     /* The step for pointer arithmetics already is 1 byte.  */
1359     step = build_int_cst (sizetype, 1);
1360
1361   iv_base = iv->base;
1362   iv_step = iv->step;
1363   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1364                             sizetype, &iv_base, &iv_step, dta->stmt,
1365                             false))
1366     {
1367       /* The index might wrap.  */
1368       return false;
1369     }
1370
1371   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1372   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1373
1374   return true;
1375 }
1376
1377 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1378    object is passed to it in DATA.  */
1379
1380 static bool
1381 idx_record_use (tree base, tree *idx,
1382                 void *vdata)
1383 {
1384   struct ivopts_data *data = (struct ivopts_data *) vdata;
1385   find_interesting_uses_op (data, *idx);
1386   if (TREE_CODE (base) == ARRAY_REF)
1387     {
1388       find_interesting_uses_op (data, array_ref_element_size (base));
1389       find_interesting_uses_op (data, array_ref_low_bound (base));
1390     }
1391   return true;
1392 }
1393
1394 /* Returns true if memory reference REF may be unaligned.  */
1395
1396 static bool
1397 may_be_unaligned_p (tree ref)
1398 {
1399   tree base;
1400   tree base_type;
1401   HOST_WIDE_INT bitsize;
1402   HOST_WIDE_INT bitpos;
1403   tree toffset;
1404   enum machine_mode mode;
1405   int unsignedp, volatilep;
1406   unsigned base_align;
1407
1408   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1409      thus they are not misaligned.  */
1410   if (TREE_CODE (ref) == TARGET_MEM_REF)
1411     return false;
1412
1413   /* The test below is basically copy of what expr.c:normal_inner_ref
1414      does to check whether the object must be loaded by parts when
1415      STRICT_ALIGNMENT is true.  */
1416   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1417                               &unsignedp, &volatilep, true);
1418   base_type = TREE_TYPE (base);
1419   base_align = TYPE_ALIGN (base_type);
1420
1421   if (mode != BLKmode
1422       && (base_align < GET_MODE_ALIGNMENT (mode)
1423           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1424           || bitpos % BITS_PER_UNIT != 0))
1425     return true;
1426
1427   return false;
1428 }
1429
1430 /* Return true if EXPR may be non-addressable.   */
1431
1432 static bool
1433 may_be_nonaddressable_p (tree expr)
1434 {
1435   switch (TREE_CODE (expr))
1436     {
1437     case COMPONENT_REF:
1438       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1439              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1440
1441     case ARRAY_REF:
1442     case ARRAY_RANGE_REF:
1443       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1444
1445     case VIEW_CONVERT_EXPR:
1446       /* This kind of view-conversions may wrap non-addressable objects
1447          and make them look addressable.  After some processing the
1448          non-addressability may be uncovered again, causing ADDR_EXPRs
1449          of inappropriate objects to be built.  */
1450       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1451              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1452
1453     default:
1454       break;
1455     }
1456
1457   return false;
1458 }
1459
1460 /* Finds addresses in *OP_P inside STMT.  */
1461
1462 static void
1463 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1464 {
1465   tree base = *op_p, step = build_int_cst (sizetype, 0);
1466   struct iv *civ;
1467   struct ifs_ivopts_data ifs_ivopts_data;
1468
1469   /* Do not play with volatile memory references.  A bit too conservative,
1470      perhaps, but safe.  */
1471   if (stmt_ann (stmt)->has_volatile_ops)
1472     goto fail;
1473
1474   /* Ignore bitfields for now.  Not really something terribly complicated
1475      to handle.  TODO.  */
1476   if (TREE_CODE (base) == BIT_FIELD_REF)
1477     goto fail;
1478
1479   if (may_be_nonaddressable_p (base))
1480     goto fail;
1481
1482   if (STRICT_ALIGNMENT
1483       && may_be_unaligned_p (base))
1484     goto fail;
1485
1486   base = unshare_expr (base);
1487
1488   if (TREE_CODE (base) == TARGET_MEM_REF)
1489     {
1490       tree type = build_pointer_type (TREE_TYPE (base));
1491       tree astep;
1492
1493       if (TMR_BASE (base)
1494           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1495         {
1496           civ = get_iv (data, TMR_BASE (base));
1497           if (!civ)
1498             goto fail;
1499
1500           TMR_BASE (base) = civ->base;
1501           step = civ->step;
1502         }
1503       if (TMR_INDEX (base)
1504           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1505         {
1506           civ = get_iv (data, TMR_INDEX (base));
1507           if (!civ)
1508             goto fail;
1509
1510           TMR_INDEX (base) = civ->base;
1511           astep = civ->step;
1512
1513           if (astep)
1514             {
1515               if (TMR_STEP (base))
1516                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1517
1518               step = fold_build2 (PLUS_EXPR, type, step, astep);
1519             }
1520         }
1521
1522       if (integer_zerop (step))
1523         goto fail;
1524       base = tree_mem_ref_addr (type, base);
1525     }
1526   else
1527     {
1528       ifs_ivopts_data.ivopts_data = data;
1529       ifs_ivopts_data.stmt = stmt;
1530       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1531       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1532           || integer_zerop (ifs_ivopts_data.step))
1533         goto fail;
1534       step = ifs_ivopts_data.step;
1535
1536       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1537       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1538
1539       base = build_fold_addr_expr (base);
1540
1541       /* Substituting bases of IVs into the base expression might
1542          have caused folding opportunities.  */
1543       if (TREE_CODE (base) == ADDR_EXPR)
1544         {
1545           tree *ref = &TREE_OPERAND (base, 0);
1546           while (handled_component_p (*ref))
1547             ref = &TREE_OPERAND (*ref, 0);
1548           if (TREE_CODE (*ref) == INDIRECT_REF)
1549             *ref = fold_indirect_ref (*ref);
1550         }
1551     }
1552
1553   civ = alloc_iv (base, step);
1554   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1555   return;
1556
1557 fail:
1558   for_each_index (op_p, idx_record_use, data);
1559 }
1560
1561 /* Finds and records invariants used in STMT.  */
1562
1563 static void
1564 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1565 {
1566   ssa_op_iter iter;
1567   use_operand_p use_p;
1568   tree op;
1569
1570   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1571     {
1572       op = USE_FROM_PTR (use_p);
1573       record_invariant (data, op, false);
1574     }
1575 }
1576
1577 /* Finds interesting uses of induction variables in the statement STMT.  */
1578
1579 static void
1580 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1581 {
1582   struct iv *iv;
1583   tree op, lhs, rhs;
1584   ssa_op_iter iter;
1585   use_operand_p use_p;
1586
1587   find_invariants_stmt (data, stmt);
1588
1589   if (TREE_CODE (stmt) == COND_EXPR)
1590     {
1591       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1592       return;
1593     }
1594
1595   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1596     {
1597       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1598       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1599
1600       if (TREE_CODE (lhs) == SSA_NAME)
1601         {
1602           /* If the statement defines an induction variable, the uses are not
1603              interesting by themselves.  */
1604
1605           iv = get_iv (data, lhs);
1606
1607           if (iv && !integer_zerop (iv->step))
1608             return;
1609         }
1610
1611       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1612         {
1613         case tcc_comparison:
1614           find_interesting_uses_cond (data, stmt,
1615                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1616           return;
1617
1618         case tcc_reference:
1619           find_interesting_uses_address (data, stmt,
1620                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1621           if (REFERENCE_CLASS_P (lhs))
1622             find_interesting_uses_address (data, stmt,
1623                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1624           return;
1625
1626         default: ;
1627         }
1628
1629       if (REFERENCE_CLASS_P (lhs)
1630           && is_gimple_val (rhs))
1631         {
1632           find_interesting_uses_address (data, stmt,
1633                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1634           find_interesting_uses_op (data, rhs);
1635           return;
1636         }
1637
1638       /* TODO -- we should also handle address uses of type
1639
1640          memory = call (whatever);
1641
1642          and
1643
1644          call (memory).  */
1645     }
1646
1647   if (TREE_CODE (stmt) == PHI_NODE
1648       && bb_for_stmt (stmt) == data->current_loop->header)
1649     {
1650       lhs = PHI_RESULT (stmt);
1651       iv = get_iv (data, lhs);
1652
1653       if (iv && !integer_zerop (iv->step))
1654         return;
1655     }
1656
1657   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1658     {
1659       op = USE_FROM_PTR (use_p);
1660
1661       if (TREE_CODE (op) != SSA_NAME)
1662         continue;
1663
1664       iv = get_iv (data, op);
1665       if (!iv)
1666         continue;
1667
1668       find_interesting_uses_op (data, op);
1669     }
1670 }
1671
1672 /* Finds interesting uses of induction variables outside of loops
1673    on loop exit edge EXIT.  */
1674
1675 static void
1676 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1677 {
1678   tree phi, def;
1679
1680   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1681     {
1682       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1683       if (is_gimple_reg (def))
1684         find_interesting_uses_op (data, def);
1685     }
1686 }
1687
1688 /* Finds uses of the induction variables that are interesting.  */
1689
1690 static void
1691 find_interesting_uses (struct ivopts_data *data)
1692 {
1693   basic_block bb;
1694   block_stmt_iterator bsi;
1695   tree phi;
1696   basic_block *body = get_loop_body (data->current_loop);
1697   unsigned i;
1698   struct version_info *info;
1699   edge e;
1700
1701   if (dump_file && (dump_flags & TDF_DETAILS))
1702     fprintf (dump_file, "Uses:\n\n");
1703
1704   for (i = 0; i < data->current_loop->num_nodes; i++)
1705     {
1706       edge_iterator ei;
1707       bb = body[i];
1708
1709       FOR_EACH_EDGE (e, ei, bb->succs)
1710         if (e->dest != EXIT_BLOCK_PTR
1711             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1712           find_interesting_uses_outside (data, e);
1713
1714       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1715         find_interesting_uses_stmt (data, phi);
1716       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1717         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1718     }
1719
1720   if (dump_file && (dump_flags & TDF_DETAILS))
1721     {
1722       bitmap_iterator bi;
1723
1724       fprintf (dump_file, "\n");
1725
1726       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1727         {
1728           info = ver_info (data, i);
1729           if (info->inv_id)
1730             {
1731               fprintf (dump_file, "  ");
1732               print_generic_expr (dump_file, info->name, TDF_SLIM);
1733               fprintf (dump_file, " is invariant (%d)%s\n",
1734                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1735             }
1736         }
1737
1738       fprintf (dump_file, "\n");
1739     }
1740
1741   free (body);
1742 }
1743
1744 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1745    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1746    we are at the top-level of the processed address.  */
1747
1748 static tree
1749 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1750                 unsigned HOST_WIDE_INT *offset)
1751 {
1752   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1753   enum tree_code code;
1754   tree type, orig_type = TREE_TYPE (expr);
1755   unsigned HOST_WIDE_INT off0, off1, st;
1756   tree orig_expr = expr;
1757
1758   STRIP_NOPS (expr);
1759
1760   type = TREE_TYPE (expr);
1761   code = TREE_CODE (expr);
1762   *offset = 0;
1763
1764   switch (code)
1765     {
1766     case INTEGER_CST:
1767       if (!cst_and_fits_in_hwi (expr)
1768           || integer_zerop (expr))
1769         return orig_expr;
1770
1771       *offset = int_cst_value (expr);
1772       return build_int_cst (orig_type, 0);
1773
1774     case POINTER_PLUS_EXPR:
1775     case PLUS_EXPR:
1776     case MINUS_EXPR:
1777       op0 = TREE_OPERAND (expr, 0);
1778       op1 = TREE_OPERAND (expr, 1);
1779
1780       op0 = strip_offset_1 (op0, false, false, &off0);
1781       op1 = strip_offset_1 (op1, false, false, &off1);
1782
1783       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1784       if (op0 == TREE_OPERAND (expr, 0)
1785           && op1 == TREE_OPERAND (expr, 1))
1786         return orig_expr;
1787
1788       if (integer_zerop (op1))
1789         expr = op0;
1790       else if (integer_zerop (op0))
1791         {
1792           if (code == MINUS_EXPR)
1793             expr = fold_build1 (NEGATE_EXPR, type, op1);
1794           else
1795             expr = op1;
1796         }
1797       else
1798         expr = fold_build2 (code, type, op0, op1);
1799
1800       return fold_convert (orig_type, expr);
1801
1802     case ARRAY_REF:
1803       if (!inside_addr)
1804         return orig_expr;
1805
1806       step = array_ref_element_size (expr);
1807       if (!cst_and_fits_in_hwi (step))
1808         break;
1809
1810       st = int_cst_value (step);
1811       op1 = TREE_OPERAND (expr, 1);
1812       op1 = strip_offset_1 (op1, false, false, &off1);
1813       *offset = off1 * st;
1814
1815       if (top_compref
1816           && integer_zerop (op1))
1817         {
1818           /* Strip the component reference completely.  */
1819           op0 = TREE_OPERAND (expr, 0);
1820           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1821           *offset += off0;
1822           return op0;
1823         }
1824       break;
1825
1826     case COMPONENT_REF:
1827       if (!inside_addr)
1828         return orig_expr;
1829
1830       tmp = component_ref_field_offset (expr);
1831       if (top_compref
1832           && cst_and_fits_in_hwi (tmp))
1833         {
1834           /* Strip the component reference completely.  */
1835           op0 = TREE_OPERAND (expr, 0);
1836           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1837           *offset = off0 + int_cst_value (tmp);
1838           return op0;
1839         }
1840       break;
1841
1842     case ADDR_EXPR:
1843       op0 = TREE_OPERAND (expr, 0);
1844       op0 = strip_offset_1 (op0, true, true, &off0);
1845       *offset += off0;
1846
1847       if (op0 == TREE_OPERAND (expr, 0))
1848         return orig_expr;
1849
1850       expr = build_fold_addr_expr (op0);
1851       return fold_convert (orig_type, expr);
1852
1853     case INDIRECT_REF:
1854       inside_addr = false;
1855       break;
1856
1857     default:
1858       return orig_expr;
1859     }
1860
1861   /* Default handling of expressions for that we want to recurse into
1862      the first operand.  */
1863   op0 = TREE_OPERAND (expr, 0);
1864   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1865   *offset += off0;
1866
1867   if (op0 == TREE_OPERAND (expr, 0)
1868       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1869     return orig_expr;
1870
1871   expr = copy_node (expr);
1872   TREE_OPERAND (expr, 0) = op0;
1873   if (op1)
1874     TREE_OPERAND (expr, 1) = op1;
1875
1876   /* Inside address, we might strip the top level component references,
1877      thus changing type of the expression.  Handling of ADDR_EXPR
1878      will fix that.  */
1879   expr = fold_convert (orig_type, expr);
1880
1881   return expr;
1882 }
1883
1884 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1885
1886 static tree
1887 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1888 {
1889   return strip_offset_1 (expr, false, false, offset);
1890 }
1891
1892 /* Returns variant of TYPE that can be used as base for different uses.
1893    We return unsigned type with the same precision, which avoids problems
1894    with overflows.  */
1895
1896 static tree
1897 generic_type_for (tree type)
1898 {
1899   if (POINTER_TYPE_P (type))
1900     return unsigned_type_for (type);
1901
1902   if (TYPE_UNSIGNED (type))
1903     return type;
1904
1905   return unsigned_type_for (type);
1906 }
1907
1908 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1909    the bitmap to that we should store it.  */
1910
1911 static struct ivopts_data *fd_ivopts_data;
1912 static tree
1913 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1914 {
1915   bitmap *depends_on = (bitmap *) data;
1916   struct version_info *info;
1917
1918   if (TREE_CODE (*expr_p) != SSA_NAME)
1919     return NULL_TREE;
1920   info = name_info (fd_ivopts_data, *expr_p);
1921
1922   if (!info->inv_id || info->has_nonlin_use)
1923     return NULL_TREE;
1924
1925   if (!*depends_on)
1926     *depends_on = BITMAP_ALLOC (NULL);
1927   bitmap_set_bit (*depends_on, info->inv_id);
1928
1929   return NULL_TREE;
1930 }
1931
1932 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1933    position to POS.  If USE is not NULL, the candidate is set as related to
1934    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1935    replacement of the final value of the iv by a direct computation.  */
1936
1937 static struct iv_cand *
1938 add_candidate_1 (struct ivopts_data *data,
1939                  tree base, tree step, bool important, enum iv_position pos,
1940                  struct iv_use *use, tree incremented_at)
1941 {
1942   unsigned i;
1943   struct iv_cand *cand = NULL;
1944   tree type, orig_type;
1945
1946   if (base)
1947     {
1948       orig_type = TREE_TYPE (base);
1949       type = generic_type_for (orig_type);
1950       if (type != orig_type)
1951         {
1952           base = fold_convert (type, base);
1953           step = fold_convert (type, step);
1954         }
1955     }
1956
1957   for (i = 0; i < n_iv_cands (data); i++)
1958     {
1959       cand = iv_cand (data, i);
1960
1961       if (cand->pos != pos)
1962         continue;
1963
1964       if (cand->incremented_at != incremented_at)
1965         continue;
1966
1967       if (!cand->iv)
1968         {
1969           if (!base && !step)
1970             break;
1971
1972           continue;
1973         }
1974
1975       if (!base && !step)
1976         continue;
1977
1978       if (operand_equal_p (base, cand->iv->base, 0)
1979           && operand_equal_p (step, cand->iv->step, 0))
1980         break;
1981     }
1982
1983   if (i == n_iv_cands (data))
1984     {
1985       cand = XCNEW (struct iv_cand);
1986       cand->id = i;
1987
1988       if (!base && !step)
1989         cand->iv = NULL;
1990       else
1991         cand->iv = alloc_iv (base, step);
1992
1993       cand->pos = pos;
1994       if (pos != IP_ORIGINAL && cand->iv)
1995         {
1996           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1997           cand->var_after = cand->var_before;
1998         }
1999       cand->important = important;
2000       cand->incremented_at = incremented_at;
2001       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2002
2003       if (step
2004           && TREE_CODE (step) != INTEGER_CST)
2005         {
2006           fd_ivopts_data = data;
2007           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2008         }
2009
2010       if (dump_file && (dump_flags & TDF_DETAILS))
2011         dump_cand (dump_file, cand);
2012     }
2013
2014   if (important && !cand->important)
2015     {
2016       cand->important = true;
2017       if (dump_file && (dump_flags & TDF_DETAILS))
2018         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2019     }
2020
2021   if (use)
2022     {
2023       bitmap_set_bit (use->related_cands, i);
2024       if (dump_file && (dump_flags & TDF_DETAILS))
2025         fprintf (dump_file, "Candidate %d is related to use %d\n",
2026                  cand->id, use->id);
2027     }
2028
2029   return cand;
2030 }
2031
2032 /* Returns true if incrementing the induction variable at the end of the LOOP
2033    is allowed.
2034
2035    The purpose is to avoid splitting latch edge with a biv increment, thus
2036    creating a jump, possibly confusing other optimization passes and leaving
2037    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2038    is not available (so we do not have a better alternative), or if the latch
2039    edge is already nonempty.  */
2040
2041 static bool
2042 allow_ip_end_pos_p (struct loop *loop)
2043 {
2044   if (!ip_normal_pos (loop))
2045     return true;
2046
2047   if (!empty_block_p (ip_end_pos (loop)))
2048     return true;
2049
2050   return false;
2051 }
2052
2053 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2054    position to POS.  If USE is not NULL, the candidate is set as related to
2055    it.  The candidate computation is scheduled on all available positions.  */
2056
2057 static void
2058 add_candidate (struct ivopts_data *data,
2059                tree base, tree step, bool important, struct iv_use *use)
2060 {
2061   if (ip_normal_pos (data->current_loop))
2062     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2063   if (ip_end_pos (data->current_loop)
2064       && allow_ip_end_pos_p (data->current_loop))
2065     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2066 }
2067
2068 /* Add a standard "0 + 1 * iteration" iv candidate for a
2069    type with SIZE bits.  */
2070
2071 static void
2072 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2073                                      unsigned int size)
2074 {
2075   tree type = lang_hooks.types.type_for_size (size, true);
2076   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2077                  true, NULL);
2078 }
2079
2080 /* Adds standard iv candidates.  */
2081
2082 static void
2083 add_standard_iv_candidates (struct ivopts_data *data)
2084 {
2085   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2086
2087   /* The same for a double-integer type if it is still fast enough.  */
2088   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2089     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2090 }
2091
2092
2093 /* Adds candidates bases on the old induction variable IV.  */
2094
2095 static void
2096 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2097 {
2098   tree phi, def;
2099   struct iv_cand *cand;
2100
2101   add_candidate (data, iv->base, iv->step, true, NULL);
2102
2103   /* The same, but with initial value zero.  */
2104   add_candidate (data,
2105                  build_int_cst (TREE_TYPE (iv->base), 0),
2106                  iv->step, true, NULL);
2107
2108   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2109   if (TREE_CODE (phi) == PHI_NODE)
2110     {
2111       /* Additionally record the possibility of leaving the original iv
2112          untouched.  */
2113       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2114       cand = add_candidate_1 (data,
2115                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2116                               SSA_NAME_DEF_STMT (def));
2117       cand->var_before = iv->ssa_name;
2118       cand->var_after = def;
2119     }
2120 }
2121
2122 /* Adds candidates based on the old induction variables.  */
2123
2124 static void
2125 add_old_ivs_candidates (struct ivopts_data *data)
2126 {
2127   unsigned i;
2128   struct iv *iv;
2129   bitmap_iterator bi;
2130
2131   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2132     {
2133       iv = ver_info (data, i)->iv;
2134       if (iv && iv->biv_p && !integer_zerop (iv->step))
2135         add_old_iv_candidates (data, iv);
2136     }
2137 }
2138
2139 /* Adds candidates based on the value of the induction variable IV and USE.  */
2140
2141 static void
2142 add_iv_value_candidates (struct ivopts_data *data,
2143                          struct iv *iv, struct iv_use *use)
2144 {
2145   unsigned HOST_WIDE_INT offset;
2146   tree base;
2147
2148   add_candidate (data, iv->base, iv->step, false, use);
2149
2150   /* The same, but with initial value zero.  Make such variable important,
2151      since it is generic enough so that possibly many uses may be based
2152      on it.  */
2153   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2154                  iv->step, true, use);
2155
2156   /* Third, try removing the constant offset.  */
2157   base = strip_offset (iv->base, &offset);
2158   if (offset)
2159     add_candidate (data, base, iv->step, false, use);
2160 }
2161
2162 /* Adds candidates based on the uses.  */
2163
2164 static void
2165 add_derived_ivs_candidates (struct ivopts_data *data)
2166 {
2167   unsigned i;
2168
2169   for (i = 0; i < n_iv_uses (data); i++)
2170     {
2171       struct iv_use *use = iv_use (data, i);
2172
2173       if (!use)
2174         continue;
2175
2176       switch (use->type)
2177         {
2178         case USE_NONLINEAR_EXPR:
2179         case USE_COMPARE:
2180         case USE_ADDRESS:
2181           /* Just add the ivs based on the value of the iv used here.  */
2182           add_iv_value_candidates (data, use->iv, use);
2183           break;
2184
2185         default:
2186           gcc_unreachable ();
2187         }
2188     }
2189 }
2190
2191 /* Record important candidates and add them to related_cands bitmaps
2192    if needed.  */
2193
2194 static void
2195 record_important_candidates (struct ivopts_data *data)
2196 {
2197   unsigned i;
2198   struct iv_use *use;
2199
2200   for (i = 0; i < n_iv_cands (data); i++)
2201     {
2202       struct iv_cand *cand = iv_cand (data, i);
2203
2204       if (cand->important)
2205         bitmap_set_bit (data->important_candidates, i);
2206     }
2207
2208   data->consider_all_candidates = (n_iv_cands (data)
2209                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2210
2211   if (data->consider_all_candidates)
2212     {
2213       /* We will not need "related_cands" bitmaps in this case,
2214          so release them to decrease peak memory consumption.  */
2215       for (i = 0; i < n_iv_uses (data); i++)
2216         {
2217           use = iv_use (data, i);
2218           BITMAP_FREE (use->related_cands);
2219         }
2220     }
2221   else
2222     {
2223       /* Add important candidates to the related_cands bitmaps.  */
2224       for (i = 0; i < n_iv_uses (data); i++)
2225         bitmap_ior_into (iv_use (data, i)->related_cands,
2226                          data->important_candidates);
2227     }
2228 }
2229
2230 /* Finds the candidates for the induction variables.  */
2231
2232 static void
2233 find_iv_candidates (struct ivopts_data *data)
2234 {
2235   /* Add commonly used ivs.  */
2236   add_standard_iv_candidates (data);
2237
2238   /* Add old induction variables.  */
2239   add_old_ivs_candidates (data);
2240
2241   /* Add induction variables derived from uses.  */
2242   add_derived_ivs_candidates (data);
2243
2244   /* Record the important candidates.  */
2245   record_important_candidates (data);
2246 }
2247
2248 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2249    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2250    we allocate a simple list to every use.  */
2251
2252 static void
2253 alloc_use_cost_map (struct ivopts_data *data)
2254 {
2255   unsigned i, size, s, j;
2256
2257   for (i = 0; i < n_iv_uses (data); i++)
2258     {
2259       struct iv_use *use = iv_use (data, i);
2260       bitmap_iterator bi;
2261
2262       if (data->consider_all_candidates)
2263         size = n_iv_cands (data);
2264       else
2265         {
2266           s = 0;
2267           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2268             {
2269               s++;
2270             }
2271
2272           /* Round up to the power of two, so that moduling by it is fast.  */
2273           for (size = 1; size < s; size <<= 1)
2274             continue;
2275         }
2276
2277       use->n_map_members = size;
2278       use->cost_map = XCNEWVEC (struct cost_pair, size);
2279     }
2280 }
2281
2282 /* Returns description of computation cost of expression whose runtime
2283    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2284
2285 static comp_cost
2286 new_cost (unsigned runtime, unsigned complexity)
2287 {
2288   comp_cost cost;
2289
2290   cost.cost = runtime;
2291   cost.complexity = complexity;
2292
2293   return cost;
2294 }
2295
2296 /* Adds costs COST1 and COST2.  */
2297
2298 static comp_cost
2299 add_costs (comp_cost cost1, comp_cost cost2)
2300 {
2301   cost1.cost += cost2.cost;
2302   cost1.complexity += cost2.complexity;
2303
2304   return cost1;
2305 }
2306 /* Subtracts costs COST1 and COST2.  */
2307
2308 static comp_cost
2309 sub_costs (comp_cost cost1, comp_cost cost2)
2310 {
2311   cost1.cost -= cost2.cost;
2312   cost1.complexity -= cost2.complexity;
2313
2314   return cost1;
2315 }
2316
2317 /* Returns a negative number if COST1 < COST2, a positive number if
2318    COST1 > COST2, and 0 if COST1 = COST2.  */
2319
2320 static int
2321 compare_costs (comp_cost cost1, comp_cost cost2)
2322 {
2323   if (cost1.cost == cost2.cost)
2324     return cost1.complexity - cost2.complexity;
2325
2326   return cost1.cost - cost2.cost;
2327 }
2328
2329 /* Returns true if COST is infinite.  */
2330
2331 static bool
2332 infinite_cost_p (comp_cost cost)
2333 {
2334   return cost.cost == INFTY;
2335 }
2336
2337 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2338    on invariants DEPENDS_ON and that the value used in expressing it
2339    is VALUE.*/
2340
2341 static void
2342 set_use_iv_cost (struct ivopts_data *data,
2343                  struct iv_use *use, struct iv_cand *cand,
2344                  comp_cost cost, bitmap depends_on, tree value)
2345 {
2346   unsigned i, s;
2347
2348   if (infinite_cost_p (cost))
2349     {
2350       BITMAP_FREE (depends_on);
2351       return;
2352     }
2353
2354   if (data->consider_all_candidates)
2355     {
2356       use->cost_map[cand->id].cand = cand;
2357       use->cost_map[cand->id].cost = cost;
2358       use->cost_map[cand->id].depends_on = depends_on;
2359       use->cost_map[cand->id].value = value;
2360       return;
2361     }
2362
2363   /* n_map_members is a power of two, so this computes modulo.  */
2364   s = cand->id & (use->n_map_members - 1);
2365   for (i = s; i < use->n_map_members; i++)
2366     if (!use->cost_map[i].cand)
2367       goto found;
2368   for (i = 0; i < s; i++)
2369     if (!use->cost_map[i].cand)
2370       goto found;
2371
2372   gcc_unreachable ();
2373
2374 found:
2375   use->cost_map[i].cand = cand;
2376   use->cost_map[i].cost = cost;
2377   use->cost_map[i].depends_on = depends_on;
2378   use->cost_map[i].value = value;
2379 }
2380
2381 /* Gets cost of (USE, CANDIDATE) pair.  */
2382
2383 static struct cost_pair *
2384 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2385                  struct iv_cand *cand)
2386 {
2387   unsigned i, s;
2388   struct cost_pair *ret;
2389
2390   if (!cand)
2391     return NULL;
2392
2393   if (data->consider_all_candidates)
2394     {
2395       ret = use->cost_map + cand->id;
2396       if (!ret->cand)
2397         return NULL;
2398
2399       return ret;
2400     }
2401
2402   /* n_map_members is a power of two, so this computes modulo.  */
2403   s = cand->id & (use->n_map_members - 1);
2404   for (i = s; i < use->n_map_members; i++)
2405     if (use->cost_map[i].cand == cand)
2406       return use->cost_map + i;
2407
2408   for (i = 0; i < s; i++)
2409     if (use->cost_map[i].cand == cand)
2410       return use->cost_map + i;
2411
2412   return NULL;
2413 }
2414
2415 /* Returns estimate on cost of computing SEQ.  */
2416
2417 static unsigned
2418 seq_cost (rtx seq)
2419 {
2420   unsigned cost = 0;
2421   rtx set;
2422
2423   for (; seq; seq = NEXT_INSN (seq))
2424     {
2425       set = single_set (seq);
2426       if (set)
2427         cost += rtx_cost (set, SET);
2428       else
2429         cost++;
2430     }
2431
2432   return cost;
2433 }
2434
2435 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2436 static rtx
2437 produce_memory_decl_rtl (tree obj, int *regno)
2438 {
2439   rtx x;
2440
2441   gcc_assert (obj);
2442   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2443     {
2444       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2445       x = gen_rtx_SYMBOL_REF (Pmode, name);
2446       SET_SYMBOL_REF_DECL (x, obj);
2447       x = gen_rtx_MEM (DECL_MODE (obj), x);
2448       targetm.encode_section_info (obj, x, true);
2449     }
2450   else
2451     {
2452       x = gen_raw_REG (Pmode, (*regno)++);
2453       x = gen_rtx_MEM (DECL_MODE (obj), x);
2454     }
2455
2456   return x;
2457 }
2458
2459 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2460    walk_tree.  DATA contains the actual fake register number.  */
2461
2462 static tree
2463 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2464 {
2465   tree obj = NULL_TREE;
2466   rtx x = NULL_RTX;
2467   int *regno = (int *) data;
2468
2469   switch (TREE_CODE (*expr_p))
2470     {
2471     case ADDR_EXPR:
2472       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2473            handled_component_p (*expr_p);
2474            expr_p = &TREE_OPERAND (*expr_p, 0))
2475         continue;
2476       obj = *expr_p;
2477       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2478         x = produce_memory_decl_rtl (obj, regno);
2479       break;
2480
2481     case SSA_NAME:
2482       *ws = 0;
2483       obj = SSA_NAME_VAR (*expr_p);
2484       if (!DECL_RTL_SET_P (obj))
2485         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2486       break;
2487
2488     case VAR_DECL:
2489     case PARM_DECL:
2490     case RESULT_DECL:
2491       *ws = 0;
2492       obj = *expr_p;
2493
2494       if (DECL_RTL_SET_P (obj))
2495         break;
2496
2497       if (DECL_MODE (obj) == BLKmode)
2498         x = produce_memory_decl_rtl (obj, regno);
2499       else
2500         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2501
2502       break;
2503
2504     default:
2505       break;
2506     }
2507
2508   if (x)
2509     {
2510       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2511       SET_DECL_RTL (obj, x);
2512     }
2513
2514   return NULL_TREE;
2515 }
2516
2517 /* Determines cost of the computation of EXPR.  */
2518
2519 static unsigned
2520 computation_cost (tree expr)
2521 {
2522   rtx seq, rslt;
2523   tree type = TREE_TYPE (expr);
2524   unsigned cost;
2525   /* Avoid using hard regs in ways which may be unsupported.  */
2526   int regno = LAST_VIRTUAL_REGISTER + 1;
2527
2528   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2529   start_sequence ();
2530   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2531   seq = get_insns ();
2532   end_sequence ();
2533
2534   cost = seq_cost (seq);
2535   if (MEM_P (rslt))
2536     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2537
2538   return cost;
2539 }
2540
2541 /* Returns variable containing the value of candidate CAND at statement AT.  */
2542
2543 static tree
2544 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2545 {
2546   if (stmt_after_increment (loop, cand, stmt))
2547     return cand->var_after;
2548   else
2549     return cand->var_before;
2550 }
2551
2552 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2553    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2554
2555 int
2556 tree_int_cst_sign_bit (const_tree t)
2557 {
2558   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2559   unsigned HOST_WIDE_INT w;
2560
2561   if (bitno < HOST_BITS_PER_WIDE_INT)
2562     w = TREE_INT_CST_LOW (t);
2563   else
2564     {
2565       w = TREE_INT_CST_HIGH (t);
2566       bitno -= HOST_BITS_PER_WIDE_INT;
2567     }
2568
2569   return (w >> bitno) & 1;
2570 }
2571
2572 /* If we can prove that TOP = cst * BOT for some constant cst,
2573    store cst to MUL and return true.  Otherwise return false.
2574    The returned value is always sign-extended, regardless of the
2575    signedness of TOP and BOT.  */
2576
2577 static bool
2578 constant_multiple_of (tree top, tree bot, double_int *mul)
2579 {
2580   tree mby;
2581   enum tree_code code;
2582   double_int res, p0, p1;
2583   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2584
2585   STRIP_NOPS (top);
2586   STRIP_NOPS (bot);
2587
2588   if (operand_equal_p (top, bot, 0))
2589     {
2590       *mul = double_int_one;
2591       return true;
2592     }
2593
2594   code = TREE_CODE (top);
2595   switch (code)
2596     {
2597     case MULT_EXPR:
2598       mby = TREE_OPERAND (top, 1);
2599       if (TREE_CODE (mby) != INTEGER_CST)
2600         return false;
2601
2602       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2603         return false;
2604
2605       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2606                               precision);
2607       return true;
2608
2609     case PLUS_EXPR:
2610     case MINUS_EXPR:
2611       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2612           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2613         return false;
2614
2615       if (code == MINUS_EXPR)
2616         p1 = double_int_neg (p1);
2617       *mul = double_int_sext (double_int_add (p0, p1), precision);
2618       return true;
2619
2620     case INTEGER_CST:
2621       if (TREE_CODE (bot) != INTEGER_CST)
2622         return false;
2623
2624       p0 = double_int_sext (tree_to_double_int (top), precision);
2625       p1 = double_int_sext (tree_to_double_int (bot), precision);
2626       if (double_int_zero_p (p1))
2627         return false;
2628       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2629                               precision);
2630       return double_int_zero_p (res);
2631
2632     default:
2633       return false;
2634     }
2635 }
2636
2637 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2638    same precision that is at least as wide as the precision of TYPE, stores
2639    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2640    type of A and B.  */
2641
2642 static tree
2643 determine_common_wider_type (tree *a, tree *b)
2644 {
2645   tree wider_type = NULL;
2646   tree suba, subb;
2647   tree atype = TREE_TYPE (*a);
2648
2649   if ((TREE_CODE (*a) == NOP_EXPR
2650        || TREE_CODE (*a) == CONVERT_EXPR))
2651     {
2652       suba = TREE_OPERAND (*a, 0);
2653       wider_type = TREE_TYPE (suba);
2654       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2655         return atype;
2656     }
2657   else
2658     return atype;
2659
2660   if ((TREE_CODE (*b) == NOP_EXPR
2661        || TREE_CODE (*b) == CONVERT_EXPR))
2662     {
2663       subb = TREE_OPERAND (*b, 0);
2664       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2665         return atype;
2666     }
2667   else
2668     return atype;
2669
2670   *a = suba;
2671   *b = subb;
2672   return wider_type;
2673 }
2674
2675 /* Determines the expression by that USE is expressed from induction variable
2676    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2677    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2678
2679 static bool
2680 get_computation_aff (struct loop *loop,
2681                      struct iv_use *use, struct iv_cand *cand, tree at,
2682                      struct affine_tree_combination *aff)
2683 {
2684   tree ubase = use->iv->base;
2685   tree ustep = use->iv->step;
2686   tree cbase = cand->iv->base;
2687   tree cstep = cand->iv->step, cstep_common;
2688   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2689   tree common_type, var;
2690   tree uutype;
2691   aff_tree cbase_aff, var_aff;
2692   double_int rat;
2693
2694   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2695     {
2696       /* We do not have a precision to express the values of use.  */
2697       return false;
2698     }
2699
2700   var = var_at_stmt (loop, cand, at);
2701   uutype = unsigned_type_for (utype);
2702
2703   /* If the conversion is not noop, perform it.  */
2704   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2705     {
2706       cstep = fold_convert (uutype, cstep);
2707       cbase = fold_convert (uutype, cbase);
2708       var = fold_convert (uutype, var);
2709     }
2710
2711   if (!constant_multiple_of (ustep, cstep, &rat))
2712     return false;
2713
2714   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2715      type, we achieve better folding by computing their difference in this
2716      wider type, and cast the result to UUTYPE.  We do not need to worry about
2717      overflows, as all the arithmetics will in the end be performed in UUTYPE
2718      anyway.  */
2719   common_type = determine_common_wider_type (&ubase, &cbase);
2720
2721   /* use = ubase - ratio * cbase + ratio * var.  */
2722   tree_to_aff_combination (ubase, common_type, aff);
2723   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2724   tree_to_aff_combination (var, uutype, &var_aff);
2725
2726   /* We need to shift the value if we are after the increment.  */
2727   if (stmt_after_increment (loop, cand, at))
2728     {
2729       aff_tree cstep_aff;
2730
2731       if (common_type != uutype)
2732         cstep_common = fold_convert (common_type, cstep);
2733       else
2734         cstep_common = cstep;
2735
2736       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2737       aff_combination_add (&cbase_aff, &cstep_aff);
2738     }
2739
2740   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2741   aff_combination_add (aff, &cbase_aff);
2742   if (common_type != uutype)
2743     aff_combination_convert (aff, uutype);
2744
2745   aff_combination_scale (&var_aff, rat);
2746   aff_combination_add (aff, &var_aff);
2747
2748   return true;
2749 }
2750
2751 /* Determines the expression by that USE is expressed from induction variable
2752    CAND at statement AT in LOOP.  The computation is unshared.  */
2753
2754 static tree
2755 get_computation_at (struct loop *loop,
2756                     struct iv_use *use, struct iv_cand *cand, tree at)
2757 {
2758   aff_tree aff;
2759   tree type = TREE_TYPE (use->iv->base);
2760
2761   if (!get_computation_aff (loop, use, cand, at, &aff))
2762     return NULL_TREE;
2763   unshare_aff_combination (&aff);
2764   return fold_convert (type, aff_combination_to_tree (&aff));
2765 }
2766
2767 /* Determines the expression by that USE is expressed from induction variable
2768    CAND in LOOP.  The computation is unshared.  */
2769
2770 static tree
2771 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2772 {
2773   return get_computation_at (loop, use, cand, use->stmt);
2774 }
2775
2776 /* Returns cost of addition in MODE.  */
2777
2778 static unsigned
2779 add_cost (enum machine_mode mode)
2780 {
2781   static unsigned costs[NUM_MACHINE_MODES];
2782   rtx seq;
2783   unsigned cost;
2784
2785   if (costs[mode])
2786     return costs[mode];
2787
2788   start_sequence ();
2789   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2790                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2791                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2792                  NULL_RTX);
2793   seq = get_insns ();
2794   end_sequence ();
2795
2796   cost = seq_cost (seq);
2797   if (!cost)
2798     cost = 1;
2799
2800   costs[mode] = cost;
2801
2802   if (dump_file && (dump_flags & TDF_DETAILS))
2803     fprintf (dump_file, "Addition in %s costs %d\n",
2804              GET_MODE_NAME (mode), cost);
2805   return cost;
2806 }
2807
2808 /* Entry in a hashtable of already known costs for multiplication.  */
2809 struct mbc_entry
2810 {
2811   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2812   enum machine_mode mode;       /* In mode.  */
2813   unsigned cost;                /* The cost.  */
2814 };
2815
2816 /* Counts hash value for the ENTRY.  */
2817
2818 static hashval_t
2819 mbc_entry_hash (const void *entry)
2820 {
2821   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2822
2823   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2824 }
2825
2826 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2827
2828 static int
2829 mbc_entry_eq (const void *entry1, const void *entry2)
2830 {
2831   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2832   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2833
2834   return (e1->mode == e2->mode
2835           && e1->cst == e2->cst);
2836 }
2837
2838 /* Returns cost of multiplication by constant CST in MODE.  */
2839
2840 unsigned
2841 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2842 {
2843   static htab_t costs;
2844   struct mbc_entry **cached, act;
2845   rtx seq;
2846   unsigned cost;
2847
2848   if (!costs)
2849     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2850
2851   act.mode = mode;
2852   act.cst = cst;
2853   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2854   if (*cached)
2855     return (*cached)->cost;
2856
2857   *cached = XNEW (struct mbc_entry);
2858   (*cached)->mode = mode;
2859   (*cached)->cst = cst;
2860
2861   start_sequence ();
2862   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2863                gen_int_mode (cst, mode), NULL_RTX, 0);
2864   seq = get_insns ();
2865   end_sequence ();
2866
2867   cost = seq_cost (seq);
2868
2869   if (dump_file && (dump_flags & TDF_DETAILS))
2870     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2871              (int) cst, GET_MODE_NAME (mode), cost);
2872
2873   (*cached)->cost = cost;
2874
2875   return cost;
2876 }
2877
2878 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2879    validity for a memory reference accessing memory of mode MODE.  */
2880
2881 bool
2882 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2883 {
2884 #define MAX_RATIO 128
2885   static sbitmap valid_mult[MAX_MACHINE_MODE];
2886
2887   if (!valid_mult[mode])
2888     {
2889       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2890       rtx addr;
2891       HOST_WIDE_INT i;
2892
2893       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2894       sbitmap_zero (valid_mult[mode]);
2895       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2896       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2897         {
2898           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2899           if (memory_address_p (mode, addr))
2900             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2901         }
2902
2903       if (dump_file && (dump_flags & TDF_DETAILS))
2904         {
2905           fprintf (dump_file, "  allowed multipliers:");
2906           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2907             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2908               fprintf (dump_file, " %d", (int) i);
2909           fprintf (dump_file, "\n");
2910           fprintf (dump_file, "\n");
2911         }
2912     }
2913
2914   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2915     return false;
2916
2917   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2918 }
2919
2920 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2921    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2922    variable is omitted.  Compute the cost for a memory reference that accesses
2923    a memory location of mode MEM_MODE.
2924
2925    TODO -- there must be some better way.  This all is quite crude.  */
2926
2927 static comp_cost
2928 get_address_cost (bool symbol_present, bool var_present,
2929                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2930                   enum machine_mode mem_mode)
2931 {
2932   static bool initialized[MAX_MACHINE_MODE];
2933   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2934   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2935   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2936   unsigned cost, acost, complexity;
2937   bool offset_p, ratio_p;
2938   HOST_WIDE_INT s_offset;
2939   unsigned HOST_WIDE_INT mask;
2940   unsigned bits;
2941
2942   if (!initialized[mem_mode])
2943     {
2944       HOST_WIDE_INT i;
2945       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2946       int old_cse_not_expected;
2947       unsigned sym_p, var_p, off_p, rat_p, add_c;
2948       rtx seq, addr, base;
2949       rtx reg0, reg1;
2950
2951       initialized[mem_mode] = true;
2952
2953       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2954
2955       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2956       for (i = start; i <= 1 << 20; i <<= 1)
2957         {
2958           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2959           if (!memory_address_p (mem_mode, addr))
2960             break;
2961         }
2962       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2963       off[mem_mode] = max_offset[mem_mode];
2964
2965       for (i = start; i <= 1 << 20; i <<= 1)
2966         {
2967           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2968           if (!memory_address_p (mem_mode, addr))
2969             break;
2970         }
2971       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2972
2973       if (dump_file && (dump_flags & TDF_DETAILS))
2974         {
2975           fprintf (dump_file, "get_address_cost:\n");
2976           fprintf (dump_file, "  min offset %s %d\n",
2977                    GET_MODE_NAME (mem_mode),
2978                    (int) min_offset[mem_mode]);
2979           fprintf (dump_file, "  max offset %s %d\n",
2980                    GET_MODE_NAME (mem_mode),
2981                    (int) max_offset[mem_mode]);
2982         }
2983
2984       rat[mem_mode] = 1;
2985       for (i = 2; i <= MAX_RATIO; i++)
2986         if (multiplier_allowed_in_address_p (i, mem_mode))
2987           {
2988             rat[mem_mode] = i;
2989             break;
2990           }
2991
2992       /* Compute the cost of various addressing modes.  */
2993       acost = 0;
2994       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2995       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2996
2997       for (i = 0; i < 16; i++)
2998         {
2999           sym_p = i & 1;
3000           var_p = (i >> 1) & 1;
3001           off_p = (i >> 2) & 1;
3002           rat_p = (i >> 3) & 1;
3003
3004           addr = reg0;
3005           if (rat_p)
3006             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3007                                    gen_int_mode (rat[mem_mode], Pmode));
3008
3009           if (var_p)
3010             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3011
3012           if (sym_p)
3013             {
3014               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3015               /* ??? We can run into trouble with some backends by presenting
3016                  it with symbols which havn't been properly passed through
3017                  targetm.encode_section_info.  By setting the local bit, we
3018                  enhance the probability of things working.  */
3019               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3020
3021               if (off_p)
3022                 base = gen_rtx_fmt_e (CONST, Pmode,
3023                                       gen_rtx_fmt_ee (PLUS, Pmode,
3024                                                       base,
3025                                                       gen_int_mode (off[mem_mode],
3026                                                                     Pmode)));
3027             }
3028           else if (off_p)
3029             base = gen_int_mode (off[mem_mode], Pmode);
3030           else
3031             base = NULL_RTX;
3032
3033           if (base)
3034             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3035
3036           start_sequence ();
3037           /* To avoid splitting addressing modes, pretend that no cse will
3038              follow.  */
3039           old_cse_not_expected = cse_not_expected;
3040           cse_not_expected = true;
3041           addr = memory_address (mem_mode, addr);
3042           cse_not_expected = old_cse_not_expected;
3043           seq = get_insns ();
3044           end_sequence ();
3045
3046           acost = seq_cost (seq);
3047           acost += address_cost (addr, mem_mode);
3048
3049           if (!acost)
3050             acost = 1;
3051           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3052         }
3053
3054       /* On some targets, it is quite expensive to load symbol to a register,
3055          which makes addresses that contain symbols look much more expensive.
3056          However, the symbol will have to be loaded in any case before the
3057          loop (and quite likely we have it in register already), so it does not
3058          make much sense to penalize them too heavily.  So make some final
3059          tweaks for the SYMBOL_PRESENT modes:
3060
3061          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3062          var is cheaper, use this mode with small penalty.
3063          If VAR_PRESENT is true, try whether the mode with
3064          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3065          if this is the case, use it.  */
3066       add_c = add_cost (Pmode);
3067       for (i = 0; i < 8; i++)
3068         {
3069           var_p = i & 1;
3070           off_p = (i >> 1) & 1;
3071           rat_p = (i >> 2) & 1;
3072
3073           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3074           if (var_p)
3075             acost += add_c;
3076
3077           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3078             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3079         }
3080
3081       if (dump_file && (dump_flags & TDF_DETAILS))
3082         {
3083           fprintf (dump_file, "Address costs:\n");
3084
3085           for (i = 0; i < 16; i++)
3086             {
3087               sym_p = i & 1;
3088               var_p = (i >> 1) & 1;
3089               off_p = (i >> 2) & 1;
3090               rat_p = (i >> 3) & 1;
3091
3092               fprintf (dump_file, "  ");
3093               if (sym_p)
3094                 fprintf (dump_file, "sym + ");
3095               if (var_p)
3096                 fprintf (dump_file, "var + ");
3097               if (off_p)
3098                 fprintf (dump_file, "cst + ");
3099               if (rat_p)
3100                 fprintf (dump_file, "rat * ");
3101
3102               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3103               fprintf (dump_file, "index costs %d\n", acost);
3104             }
3105           fprintf (dump_file, "\n");
3106         }
3107     }
3108
3109   bits = GET_MODE_BITSIZE (Pmode);
3110   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3111   offset &= mask;
3112   if ((offset >> (bits - 1) & 1))
3113     offset |= ~mask;
3114   s_offset = offset;
3115
3116   cost = 0;
3117   offset_p = (s_offset != 0
3118               && min_offset[mem_mode] <= s_offset
3119               && s_offset <= max_offset[mem_mode]);
3120   ratio_p = (ratio != 1
3121              && multiplier_allowed_in_address_p (ratio, mem_mode));
3122
3123   if (ratio != 1 && !ratio_p)
3124     cost += multiply_by_cost (ratio, Pmode);
3125
3126   if (s_offset && !offset_p && !symbol_present)
3127     cost += add_cost (Pmode);
3128
3129   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3130   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3131   return new_cost (cost + acost, complexity);
3132 }
3133
3134 /* Estimates cost of forcing expression EXPR into a variable.  */
3135
3136 static comp_cost
3137 force_expr_to_var_cost (tree expr)
3138 {
3139   static bool costs_initialized = false;
3140   static unsigned integer_cost;
3141   static unsigned symbol_cost;
3142   static unsigned address_cost;
3143   tree op0, op1;
3144   comp_cost cost0, cost1, cost;
3145   enum machine_mode mode;
3146
3147   if (!costs_initialized)
3148     {
3149       tree type = build_pointer_type (integer_type_node);
3150       tree var, addr;
3151       rtx x;
3152
3153       var = create_tmp_var_raw (integer_type_node, "test_var");
3154       TREE_STATIC (var) = 1;
3155       x = produce_memory_decl_rtl (var, NULL);
3156       SET_DECL_RTL (var, x);
3157
3158       integer_cost = computation_cost (build_int_cst (integer_type_node,
3159                                                       2000));
3160
3161       addr = build1 (ADDR_EXPR, type, var);
3162       symbol_cost = computation_cost (addr) + 1;
3163
3164       address_cost
3165         = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3166                                     addr,
3167                                     build_int_cst (sizetype, 2000))) + 1;
3168       if (dump_file && (dump_flags & TDF_DETAILS))
3169         {
3170           fprintf (dump_file, "force_expr_to_var_cost:\n");
3171           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3172           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3173           fprintf (dump_file, "  address %d\n", (int) address_cost);
3174           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3175           fprintf (dump_file, "\n");
3176         }
3177
3178       costs_initialized = true;
3179     }
3180
3181   STRIP_NOPS (expr);
3182
3183   if (SSA_VAR_P (expr))
3184     return zero_cost;
3185
3186   if (TREE_INVARIANT (expr))
3187     {
3188       if (TREE_CODE (expr) == INTEGER_CST)
3189         return new_cost (integer_cost, 0);
3190
3191       if (TREE_CODE (expr) == ADDR_EXPR)
3192         {
3193           tree obj = TREE_OPERAND (expr, 0);
3194
3195           if (TREE_CODE (obj) == VAR_DECL
3196               || TREE_CODE (obj) == PARM_DECL
3197               || TREE_CODE (obj) == RESULT_DECL)
3198             return new_cost (symbol_cost, 0);
3199         }
3200
3201       return new_cost (address_cost, 0);
3202     }
3203
3204   switch (TREE_CODE (expr))
3205     {
3206     case POINTER_PLUS_EXPR:
3207     case PLUS_EXPR:
3208     case MINUS_EXPR:
3209     case MULT_EXPR:
3210       op0 = TREE_OPERAND (expr, 0);
3211       op1 = TREE_OPERAND (expr, 1);
3212       STRIP_NOPS (op0);
3213       STRIP_NOPS (op1);
3214
3215       if (is_gimple_val (op0))
3216         cost0 = zero_cost;
3217       else
3218         cost0 = force_expr_to_var_cost (op0);
3219
3220       if (is_gimple_val (op1))
3221         cost1 = zero_cost;
3222       else
3223         cost1 = force_expr_to_var_cost (op1);
3224
3225       break;
3226
3227     default:
3228       /* Just an arbitrary value, FIXME.  */
3229       return new_cost (target_spill_cost, 0);
3230     }
3231
3232   mode = TYPE_MODE (TREE_TYPE (expr));
3233   switch (TREE_CODE (expr))
3234     {
3235     case POINTER_PLUS_EXPR:
3236     case PLUS_EXPR:
3237     case MINUS_EXPR:
3238       cost = new_cost (add_cost (mode), 0);
3239       break;
3240
3241     case MULT_EXPR:
3242       if (cst_and_fits_in_hwi (op0))
3243         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3244       else if (cst_and_fits_in_hwi (op1))
3245         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3246       else
3247         return new_cost (target_spill_cost, 0);
3248       break;
3249
3250     default:
3251       gcc_unreachable ();
3252     }
3253
3254   cost = add_costs (cost, cost0);
3255   cost = add_costs (cost, cost1);
3256
3257   /* Bound the cost by target_spill_cost.  The parts of complicated
3258      computations often are either loop invariant or at least can
3259      be shared between several iv uses, so letting this grow without
3260      limits would not give reasonable results.  */
3261   if (cost.cost > target_spill_cost)
3262     cost.cost = target_spill_cost;
3263
3264   return cost;
3265 }
3266
3267 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3268    invariants the computation depends on.  */
3269
3270 static comp_cost
3271 force_var_cost (struct ivopts_data *data,
3272                 tree expr, bitmap *depends_on)
3273 {
3274   if (depends_on)
3275     {
3276       fd_ivopts_data = data;
3277       walk_tree (&expr, find_depends, depends_on, NULL);
3278     }
3279
3280   return force_expr_to_var_cost (expr);
3281 }
3282
3283 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3284    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3285    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3286    invariants the computation depends on.  */
3287
3288 static comp_cost
3289 split_address_cost (struct ivopts_data *data,
3290                     tree addr, bool *symbol_present, bool *var_present,
3291                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3292 {
3293   tree core;
3294   HOST_WIDE_INT bitsize;
3295   HOST_WIDE_INT bitpos;
3296   tree toffset;
3297   enum machine_mode mode;
3298   int unsignedp, volatilep;
3299
3300   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3301                               &unsignedp, &volatilep, false);
3302
3303   if (toffset != 0
3304       || bitpos % BITS_PER_UNIT != 0
3305       || TREE_CODE (core) != VAR_DECL)
3306     {
3307       *symbol_present = false;
3308       *var_present = true;
3309       fd_ivopts_data = data;
3310       walk_tree (&addr, find_depends, depends_on, NULL);
3311       return new_cost (target_spill_cost, 0);
3312     }
3313
3314   *offset += bitpos / BITS_PER_UNIT;
3315   if (TREE_STATIC (core)
3316       || DECL_EXTERNAL (core))
3317     {
3318       *symbol_present = true;
3319       *var_present = false;
3320       return zero_cost;
3321     }
3322
3323   *symbol_present = false;
3324   *var_present = true;
3325   return zero_cost;
3326 }
3327
3328 /* Estimates cost of expressing difference of addresses E1 - E2 as
3329    var + symbol + offset.  The value of offset is added to OFFSET,
3330    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3331    part is missing.  DEPENDS_ON is a set of the invariants the computation
3332    depends on.  */
3333
3334 static comp_cost
3335 ptr_difference_cost (struct ivopts_data *data,
3336                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3337                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3338 {
3339   HOST_WIDE_INT diff = 0;
3340   comp_cost cost;
3341
3342   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3343
3344   if (ptr_difference_const (e1, e2, &diff))
3345     {
3346       *offset += diff;
3347       *symbol_present = false;
3348       *var_present = false;
3349       return zero_cost;
3350     }
3351
3352   if (integer_zerop (e2))
3353     return split_address_cost (data, TREE_OPERAND (e1, 0),
3354                                symbol_present, var_present, offset, depends_on);
3355
3356   *symbol_present = false;
3357   *var_present = true;
3358
3359   cost = force_var_cost (data, e1, depends_on);
3360   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3361   cost.cost += add_cost (Pmode);
3362
3363   return cost;
3364 }
3365
3366 /* Estimates cost of expressing difference E1 - E2 as
3367    var + symbol + offset.  The value of offset is added to OFFSET,
3368    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3369    part is missing.  DEPENDS_ON is a set of the invariants the computation
3370    depends on.  */
3371
3372 static comp_cost
3373 difference_cost (struct ivopts_data *data,
3374                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3375                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3376 {
3377   comp_cost cost;
3378   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3379   unsigned HOST_WIDE_INT off1, off2;
3380
3381   e1 = strip_offset (e1, &off1);
3382   e2 = strip_offset (e2, &off2);
3383   *offset += off1 - off2;
3384
3385   STRIP_NOPS (e1);
3386   STRIP_NOPS (e2);
3387
3388   if (TREE_CODE (e1) == ADDR_EXPR)
3389     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3390                                 depends_on);
3391   *symbol_present = false;
3392
3393   if (operand_equal_p (e1, e2, 0))
3394     {
3395       *var_present = false;
3396       return zero_cost;
3397     }
3398   *var_present = true;
3399   if (integer_zerop (e2))
3400     return force_var_cost (data, e1, depends_on);
3401
3402   if (integer_zerop (e1))
3403     {
3404       cost = force_var_cost (data, e2, depends_on);
3405       cost.cost += multiply_by_cost (-1, mode);
3406
3407       return cost;
3408     }
3409
3410   cost = force_var_cost (data, e1, depends_on);
3411   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3412   cost.cost += add_cost (mode);
3413
3414   return cost;
3415 }
3416
3417 /* Determines the cost of the computation by that USE is expressed
3418    from induction variable CAND.  If ADDRESS_P is true, we just need
3419    to create an address from it, otherwise we want to get it into
3420    register.  A set of invariants we depend on is stored in
3421    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3422
3423 static comp_cost
3424 get_computation_cost_at (struct ivopts_data *data,
3425                          struct iv_use *use, struct iv_cand *cand,
3426                          bool address_p, bitmap *depends_on, tree at)
3427 {
3428   tree ubase = use->iv->base, ustep = use->iv->step;
3429   tree cbase, cstep;
3430   tree utype = TREE_TYPE (ubase), ctype;
3431   unsigned HOST_WIDE_INT cstepi, offset = 0;
3432   HOST_WIDE_INT ratio, aratio;
3433   bool var_present, symbol_present;
3434   comp_cost cost;
3435   unsigned n_sums;
3436   double_int rat;
3437
3438   *depends_on = NULL;
3439
3440   /* Only consider real candidates.  */
3441   if (!cand->iv)
3442     return infinite_cost;
3443
3444   cbase = cand->iv->base;
3445   cstep = cand->iv->step;
3446   ctype = TREE_TYPE (cbase);
3447
3448   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3449     {
3450       /* We do not have a precision to express the values of use.  */
3451       return infinite_cost;
3452     }
3453
3454   if (address_p)
3455     {
3456       /* Do not try to express address of an object with computation based
3457          on address of a different object.  This may cause problems in rtl
3458          level alias analysis (that does not expect this to be happening,
3459          as this is illegal in C), and would be unlikely to be useful
3460          anyway.  */
3461       if (use->iv->base_object
3462           && cand->iv->base_object
3463           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3464         return infinite_cost;
3465     }
3466
3467   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3468     {
3469       /* TODO -- add direct handling of this case.  */
3470       goto fallback;
3471     }
3472
3473   /* CSTEPI is removed from the offset in case statement is after the
3474      increment.  If the step is not constant, we use zero instead.
3475      This is a bit imprecise (there is the extra addition), but
3476      redundancy elimination is likely to transform the code so that
3477      it uses value of the variable before increment anyway,
3478      so it is not that much unrealistic.  */
3479   if (cst_and_fits_in_hwi (cstep))
3480     cstepi = int_cst_value (cstep);
3481   else
3482     cstepi = 0;
3483
3484   if (!constant_multiple_of (ustep, cstep, &rat))
3485     return infinite_cost;
3486
3487   if (double_int_fits_in_shwi_p (rat))
3488     ratio = double_int_to_shwi (rat);
3489   else
3490     return infinite_cost;
3491
3492   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3493      or ratio == 1, it is better to handle this like
3494
3495      ubase - ratio * cbase + ratio * var
3496
3497      (also holds in the case ratio == -1, TODO.  */
3498
3499   if (cst_and_fits_in_hwi (cbase))
3500     {
3501       offset = - ratio * int_cst_value (cbase);
3502       cost = difference_cost (data,
3503                               ubase, build_int_cst (utype, 0),
3504                               &symbol_present, &var_present, &offset,
3505                               depends_on);
3506     }
3507   else if (ratio == 1)
3508     {
3509       cost = difference_cost (data,
3510                               ubase, cbase,
3511                               &symbol_present, &var_present, &offset,
3512                               depends_on);
3513     }
3514   else
3515     {
3516       cost = force_var_cost (data, cbase, depends_on);
3517       cost.cost += add_cost (TYPE_MODE (ctype));
3518       cost = add_costs (cost,
3519                         difference_cost (data,
3520                                          ubase, build_int_cst (utype, 0),
3521                                          &symbol_present, &var_present,
3522                                          &offset, depends_on));
3523     }
3524
3525   /* If we are after the increment, the value of the candidate is higher by
3526      one iteration.  */
3527   if (stmt_after_increment (data->current_loop, cand, at))
3528     offset -= ratio * cstepi;
3529
3530   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3531      (symbol/var/const parts may be omitted).  If we are looking for an address,
3532      find the cost of addressing this.  */
3533   if (address_p)
3534     return add_costs (cost, get_address_cost (symbol_present, var_present,
3535                                 offset, ratio,
3536                                 TYPE_MODE (TREE_TYPE (*use->op_p))));
3537
3538   /* Otherwise estimate the costs for computing the expression.  */
3539   aratio = ratio > 0 ? ratio : -ratio;
3540   if (!symbol_present && !var_present && !offset)
3541     {
3542       if (ratio != 1)
3543         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3544
3545       return cost;
3546     }
3547
3548   if (aratio != 1)
3549     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3550
3551   n_sums = 1;
3552   if (var_present
3553       /* Symbol + offset should be compile-time computable.  */
3554       && (symbol_present || offset))
3555     n_sums++;
3556
3557   /* Having offset does not affect runtime cost in case it is added to
3558      symbol, but it increases complexity.  */
3559   if (offset)
3560     cost.complexity++;
3561
3562   cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3563   return cost;
3564
3565 fallback:
3566   {
3567     /* Just get the expression, expand it and measure the cost.  */
3568     tree comp = get_computation_at (data->current_loop, use, cand, at);
3569
3570     if (!comp)
3571       return infinite_cost;
3572
3573     if (address_p)
3574       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3575
3576     return new_cost (computation_cost (comp), 0);
3577   }
3578 }
3579
3580 /* Determines the cost of the computation by that USE is expressed
3581    from induction variable CAND.  If ADDRESS_P is true, we just need
3582    to create an address from it, otherwise we want to get it into
3583    register.  A set of invariants we depend on is stored in
3584    DEPENDS_ON.  */
3585
3586 static comp_cost
3587 get_computation_cost (struct ivopts_data *data,
3588                       struct iv_use *use, struct iv_cand *cand,
3589                       bool address_p, bitmap *depends_on)
3590 {
3591   return get_computation_cost_at (data,
3592                                   use, cand, address_p, depends_on, use->stmt);
3593 }
3594
3595 /* Determines cost of basing replacement of USE on CAND in a generic
3596    expression.  */
3597
3598 static bool
3599 determine_use_iv_cost_generic (struct ivopts_data *data,
3600                                struct iv_use *use, struct iv_cand *cand)
3601 {
3602   bitmap depends_on;
3603   comp_cost cost;
3604
3605   /* The simple case first -- if we need to express value of the preserved
3606      original biv, the cost is 0.  This also prevents us from counting the
3607      cost of increment twice -- once at this use and once in the cost of
3608      the candidate.  */
3609   if (cand->pos == IP_ORIGINAL
3610       && cand->incremented_at == use->stmt)
3611     {
3612       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3613       return true;
3614     }
3615
3616   cost = get_computation_cost (data, use, cand, false, &depends_on);
3617   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3618
3619   return !infinite_cost_p (cost);
3620 }
3621
3622 /* Determines cost of basing replacement of USE on CAND in an address.  */
3623
3624 static bool
3625 determine_use_iv_cost_address (struct ivopts_data *data,
3626                                struct iv_use *use, struct iv_cand *cand)
3627 {
3628   bitmap depends_on;
3629   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3630
3631   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3632
3633   return !infinite_cost_p (cost);
3634 }
3635
3636 /* Computes value of candidate CAND at position AT in iteration NITER, and
3637    stores it to VAL.  */
3638
3639 static void
3640 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3641                aff_tree *val)
3642 {
3643   aff_tree step, delta, nit;
3644   struct iv *iv = cand->iv;
3645   tree type = TREE_TYPE (iv->base);
3646
3647   tree_to_aff_combination (iv->step, type, &step);
3648   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3649   aff_combination_convert (&nit, type);
3650   aff_combination_mult (&nit, &step, &delta);
3651   if (stmt_after_increment (loop, cand, at))
3652     aff_combination_add (&delta, &step);
3653
3654   tree_to_aff_combination (iv->base, type, val);
3655   aff_combination_add (val, &delta);
3656 }
3657
3658 /* Returns period of induction variable iv.  */
3659
3660 static tree
3661 iv_period (struct iv *iv)
3662 {
3663   tree step = iv->step, period, type;
3664   tree pow2div;
3665
3666   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3667
3668   /* Period of the iv is gcd (step, type range).  Since type range is power
3669      of two, it suffices to determine the maximum power of two that divides
3670      step.  */
3671   pow2div = num_ending_zeros (step);
3672   type = unsigned_type_for (TREE_TYPE (step));
3673
3674   period = build_low_bits_mask (type,
3675                                 (TYPE_PRECISION (type)
3676                                  - tree_low_cst (pow2div, 1)));
3677
3678   return period;
3679 }
3680
3681 /* Returns the comparison operator used when eliminating the iv USE.  */
3682
3683 static enum tree_code
3684 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3685 {
3686   struct loop *loop = data->current_loop;
3687   basic_block ex_bb;
3688   edge exit;
3689
3690   ex_bb = bb_for_stmt (use->stmt);
3691   exit = EDGE_SUCC (ex_bb, 0);
3692   if (flow_bb_inside_loop_p (loop, exit->dest))
3693     exit = EDGE_SUCC (ex_bb, 1);
3694
3695   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3696 }
3697
3698 /* Check whether it is possible to express the condition in USE by comparison
3699    of candidate CAND.  If so, store the value compared with to BOUND.  */
3700
3701 static bool
3702 may_eliminate_iv (struct ivopts_data *data,
3703                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3704 {
3705   basic_block ex_bb;
3706   edge exit;
3707   tree nit, period;
3708   struct loop *loop = data->current_loop;
3709   aff_tree bnd;
3710   double_int period_value, max_niter;
3711
3712   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3713     return false;
3714
3715   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3716      for other conditions inside loop body.  */
3717   ex_bb = bb_for_stmt (use->stmt);
3718   if (use->stmt != last_stmt (ex_bb)
3719       || TREE_CODE (use->stmt) != COND_EXPR)
3720     return false;
3721   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3722     return false;
3723
3724   exit = EDGE_SUCC (ex_bb, 0);
3725   if (flow_bb_inside_loop_p (loop, exit->dest))
3726     exit = EDGE_SUCC (ex_bb, 1);
3727   if (flow_bb_inside_loop_p (loop, exit->dest))
3728     return false;
3729
3730   nit = niter_for_exit (data, exit);
3731   if (!nit)
3732     return false;
3733
3734   /* Determine whether we may use the variable to test whether niter iterations
3735      elapsed.  This is the case iff the period of the induction variable is
3736      greater than the number of iterations.  */
3737   period = iv_period (cand->iv);
3738   if (!period)
3739     return false;
3740
3741   /* Compare the period with the estimate on the number of iterations of the
3742      loop.  */
3743   if (!estimated_loop_iterations (loop, true, &max_niter))
3744     return false;
3745   period_value = tree_to_double_int (period);
3746   if (double_int_ucmp (period_value, max_niter) <= 0)
3747     return false;
3748
3749   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3750   *bound = aff_combination_to_tree (&bnd);
3751   return true;
3752 }
3753
3754 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3755
3756 static bool
3757 determine_use_iv_cost_condition (struct ivopts_data *data,
3758                                  struct iv_use *use, struct iv_cand *cand)
3759 {
3760   tree bound = NULL_TREE;
3761   struct iv *cmp_iv;
3762   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3763   comp_cost elim_cost, express_cost, cost;
3764   bool ok;
3765
3766   /* Only consider real candidates.  */
3767   if (!cand->iv)
3768     {
3769       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3770       return false;
3771     }
3772
3773   /* Try iv elimination.  */
3774   if (may_eliminate_iv (data, use, cand, &bound))
3775     {
3776       elim_cost = force_var_cost (data, bound, &depends_on_elim);
3777       /* The bound is a loop invariant, so it will be only computed
3778          once.  */
3779       elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3780     }
3781   else
3782     elim_cost = infinite_cost;
3783
3784   /* Try expressing the original giv.  If it is compared with an invariant,
3785      note that we cannot get rid of it.  */
3786   ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3787   gcc_assert (ok);
3788
3789   express_cost = get_computation_cost (data, use, cand, false,
3790                                        &depends_on_express);
3791   fd_ivopts_data = data;
3792   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3793
3794   /* Choose the better approach.  */
3795   if (compare_costs (elim_cost, express_cost) < 0)
3796     {
3797       cost = elim_cost;
3798       depends_on = depends_on_elim;
3799       depends_on_elim = NULL;
3800     }
3801   else
3802     {
3803       cost = express_cost;
3804       depends_on = depends_on_express;
3805       depends_on_express = NULL;
3806       bound = NULL_TREE;
3807     }
3808
3809   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3810
3811   if (depends_on_elim)
3812     BITMAP_FREE (depends_on_elim);
3813   if (depends_on_express)
3814     BITMAP_FREE (depends_on_express);
3815
3816   return !infinite_cost_p (cost);
3817 }
3818
3819 /* Determines cost of basing replacement of USE on CAND.  Returns false
3820    if USE cannot be based on CAND.  */
3821
3822 static bool
3823 determine_use_iv_cost (struct ivopts_data *data,
3824                        struct iv_use *use, struct iv_cand *cand)
3825 {
3826   switch (use->type)
3827     {
3828     case USE_NONLINEAR_EXPR:
3829       return determine_use_iv_cost_generic (data, use, cand);
3830
3831     case USE_ADDRESS:
3832       return determine_use_iv_cost_address (data, use, cand);
3833
3834     case USE_COMPARE:
3835       return determine_use_iv_cost_condition (data, use, cand);
3836
3837     default:
3838       gcc_unreachable ();
3839     }
3840 }
3841
3842 /* Determines costs of basing the use of the iv on an iv candidate.  */
3843
3844 static void
3845 determine_use_iv_costs (struct ivopts_data *data)
3846 {
3847   unsigned i, j;
3848   struct iv_use *use;
3849   struct iv_cand *cand;
3850   bitmap to_clear = BITMAP_ALLOC (NULL);
3851
3852   alloc_use_cost_map (data);
3853
3854   for (i = 0; i < n_iv_uses (data); i++)
3855     {
3856       use = iv_use (data, i);
3857
3858       if (data->consider_all_candidates)
3859         {
3860           for (j = 0; j < n_iv_cands (data); j++)
3861             {
3862               cand = iv_cand (data, j);
3863               determine_use_iv_cost (data, use, cand);
3864             }
3865         }
3866       else
3867         {
3868           bitmap_iterator bi;
3869
3870           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3871             {
3872               cand = iv_cand (data, j);
3873               if (!determine_use_iv_cost (data, use, cand))
3874                 bitmap_set_bit (to_clear, j);
3875             }
3876
3877           /* Remove the candidates for that the cost is infinite from
3878              the list of related candidates.  */
3879           bitmap_and_compl_into (use->related_cands, to_clear);
3880           bitmap_clear (to_clear);
3881         }
3882     }
3883
3884   BITMAP_FREE (to_clear);
3885
3886   if (dump_file && (dump_flags & TDF_DETAILS))
3887     {
3888       fprintf (dump_file, "Use-candidate costs:\n");
3889
3890       for (i = 0; i < n_iv_uses (data); i++)
3891         {
3892           use = iv_use (data, i);
3893
3894           fprintf (dump_file, "Use %d:\n", i);
3895           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
3896           for (j = 0; j < use->n_map_members; j++)
3897             {
3898               if (!use->cost_map[j].cand
3899                   || infinite_cost_p (use->cost_map[j].cost))
3900                 continue;
3901
3902               fprintf (dump_file, "  %d\t%d\t%d\t",
3903                        use->cost_map[j].cand->id,
3904                        use->cost_map[j].cost.cost,
3905                        use->cost_map[j].cost.complexity);
3906               if (use->cost_map[j].depends_on)
3907                 bitmap_print (dump_file,
3908                               use->cost_map[j].depends_on, "","");
3909               fprintf (dump_file, "\n");
3910             }
3911
3912           fprintf (dump_file, "\n");
3913         }
3914       fprintf (dump_file, "\n");
3915     }
3916 }
3917
3918 /* Determines cost of the candidate CAND.  */
3919
3920 static void
3921 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3922 {
3923   comp_cost cost_base;
3924   unsigned cost, cost_step;
3925   tree base;
3926
3927   if (!cand->iv)
3928     {
3929       cand->cost = 0;
3930       return;
3931     }
3932
3933   /* There are two costs associated with the candidate -- its increment
3934      and its initialization.  The second is almost negligible for any loop
3935      that rolls enough, so we take it just very little into account.  */
3936
3937   base = cand->iv->base;
3938   cost_base = force_var_cost (data, base, NULL);
3939   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3940
3941   cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
3942
3943   /* Prefer the original ivs unless we may gain something by replacing it.
3944      The reason is to makee debugging simpler; so this is not relevant for
3945      artificial ivs created by other optimization passes.  */
3946   if (cand->pos != IP_ORIGINAL
3947       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3948     cost++;
3949
3950   /* Prefer not to insert statements into latch unless there are some
3951      already (so that we do not create unnecessary jumps).  */
3952   if (cand->pos == IP_END
3953       && empty_block_p (ip_end_pos (data->current_loop)))
3954     cost++;
3955
3956   cand->cost = cost;
3957 }
3958
3959 /* Determines costs of computation of the candidates.  */
3960
3961 static void
3962 determine_iv_costs (struct ivopts_data *data)
3963 {
3964   unsigned i;
3965
3966   if (dump_file && (dump_flags & TDF_DETAILS))
3967     {
3968       fprintf (dump_file, "Candidate costs:\n");
3969       fprintf (dump_file, "  cand\tcost\n");
3970     }
3971
3972   for (i = 0; i < n_iv_cands (data); i++)
3973     {
3974       struct iv_cand *cand = iv_cand (data, i);
3975
3976       determine_iv_cost (data, cand);
3977
3978       if (dump_file && (dump_flags & TDF_DETAILS))
3979         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3980     }
3981
3982   if (dump_file && (dump_flags & TDF_DETAILS))
3983     fprintf (dump_file, "\n");
3984 }
3985
3986 /* Calculates cost for having SIZE induction variables.  */
3987
3988 static unsigned
3989 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3990 {
3991   /* We add size to the cost, so that we prefer eliminating ivs
3992      if possible.  */
3993   return size + estimate_reg_pressure_cost (size, data->regs_used);
3994 }
3995
3996 /* For each size of the induction variable set determine the penalty.  */
3997
3998 static void
3999 determine_set_costs (struct ivopts_data *data)
4000 {
4001   unsigned j, n;
4002   tree phi, op;
4003   struct loop *loop = data->current_loop;
4004   bitmap_iterator bi;
4005
4006   /* We use the following model (definitely improvable, especially the
4007      cost function -- TODO):
4008
4009      We estimate the number of registers available (using MD data), name it A.
4010
4011      We estimate the number of registers used by the loop, name it U.  This
4012      number is obtained as the number of loop phi nodes (not counting virtual
4013      registers and bivs) + the number of variables from outside of the loop.
4014
4015      We set a reserve R (free regs that are used for temporary computations,
4016      etc.).  For now the reserve is a constant 3.
4017
4018      Let I be the number of induction variables.
4019
4020      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4021         make a lot of ivs without a reason).
4022      -- if A - R < U + I <= A, the cost is I * PRES_COST
4023      -- if U + I > A, the cost is I * PRES_COST and
4024         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4025
4026   if (dump_file && (dump_flags & TDF_DETAILS))
4027     {
4028       fprintf (dump_file, "Global costs:\n");
4029       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4030       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost);
4031       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4032     }
4033
4034   n = 0;
4035   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4036     {
4037       op = PHI_RESULT (phi);
4038
4039       if (!is_gimple_reg (op))
4040         continue;
4041
4042       if (get_iv (data, op))
4043         continue;
4044
4045       n++;
4046     }
4047
4048   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4049     {
4050       struct version_info *info = ver_info (data, j);
4051
4052       if (info->inv_id && info->has_nonlin_use)
4053         n++;
4054     }
4055
4056   data->regs_used = n;
4057   if (dump_file && (dump_flags & TDF_DETAILS))
4058     fprintf (dump_file, "  regs_used %d\n", n);
4059
4060   if (dump_file && (dump_flags & TDF_DETAILS))
4061     {
4062       fprintf (dump_file, "  cost for size:\n");
4063       fprintf (dump_file, "  ivs\tcost\n");
4064       for (j = 0; j <= 2 * target_avail_regs; j++)
4065         fprintf (dump_file, "  %d\t%d\n", j,
4066                  ivopts_global_cost_for_size (data, j));
4067       fprintf (dump_file, "\n");
4068     }
4069 }
4070
4071 /* Returns true if A is a cheaper cost pair than B.  */
4072
4073 static bool
4074 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4075 {
4076   int cmp;
4077
4078   if (!a)
4079     return false;
4080
4081   if (!b)
4082     return true;
4083
4084   cmp = compare_costs (a->cost, b->cost);
4085   if (cmp < 0)
4086     return true;
4087
4088   if (cmp > 0)
4089     return false;
4090
4091   /* In case the costs are the same, prefer the cheaper candidate.  */
4092   if (a->cand->cost < b->cand->cost)
4093     return true;
4094
4095   return false;
4096 }
4097
4098 /* Computes the cost field of IVS structure.  */
4099
4100 static void
4101 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4102 {
4103   comp_cost cost = ivs->cand_use_cost;
4104   cost.cost += ivs->cand_cost;
4105   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4106
4107   ivs->cost = cost;
4108 }
4109
4110 /* Remove invariants in set INVS to set IVS.  */
4111
4112 static void
4113 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4114 {
4115   bitmap_iterator bi;
4116   unsigned iid;
4117
4118   if (!invs)
4119     return;
4120
4121   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4122     {
4123       ivs->n_invariant_uses[iid]--;
4124       if (ivs->n_invariant_uses[iid] == 0)
4125         ivs->n_regs--;
4126     }
4127 }
4128
4129 /* Set USE not to be expressed by any candidate in IVS.  */
4130
4131 static void
4132 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4133                  struct iv_use *use)
4134 {
4135   unsigned uid = use->id, cid;
4136   struct cost_pair *cp;
4137
4138   cp = ivs->cand_for_use[uid];
4139   if (!cp)
4140     return;
4141   cid = cp->cand->id;
4142
4143   ivs->bad_uses++;
4144   ivs->cand_for_use[uid] = NULL;
4145   ivs->n_cand_uses[cid]--;
4146
4147   if (ivs->n_cand_uses[cid] == 0)
4148     {
4149       bitmap_clear_bit (ivs->cands, cid);
4150       /* Do not count the pseudocandidates.  */
4151       if (cp->cand->iv)
4152         ivs->n_regs--;
4153       ivs->n_cands--;
4154       ivs->cand_cost -= cp->cand->cost;
4155
4156       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4157     }
4158
4159   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4160
4161   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4162   iv_ca_recount_cost (data, ivs);
4163 }
4164
4165 /* Add invariants in set INVS to set IVS.  */
4166
4167 static void
4168 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4169 {
4170   bitmap_iterator bi;
4171   unsigned iid;
4172
4173   if (!invs)
4174     return;
4175
4176   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4177     {
4178       ivs->n_invariant_uses[iid]++;
4179       if (ivs->n_invariant_uses[iid] == 1)
4180         ivs->n_regs++;
4181     }
4182 }
4183
4184 /* Set cost pair for USE in set IVS to CP.  */
4185
4186 static void
4187 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4188               struct iv_use *use, struct cost_pair *cp)
4189 {
4190   unsigned uid = use->id, cid;
4191
4192   if (ivs->cand_for_use[uid] == cp)
4193     return;
4194
4195   if (ivs->cand_for_use[uid])
4196     iv_ca_set_no_cp (data, ivs, use);
4197
4198   if (cp)
4199     {
4200       cid = cp->cand->id;
4201
4202       ivs->bad_uses--;
4203       ivs->cand_for_use[uid] = cp;
4204       ivs->n_cand_uses[cid]++;
4205       if (ivs->n_cand_uses[cid] == 1)
4206         {
4207           bitmap_set_bit (ivs->cands, cid);
4208           /* Do not count the pseudocandidates.  */
4209           if (cp->cand->iv)
4210             ivs->n_regs++;
4211           ivs->n_cands++;
4212           ivs->cand_cost += cp->cand->cost;
4213
4214           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4215         }
4216
4217       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4218       iv_ca_set_add_invariants (ivs, cp->depends_on);
4219       iv_ca_recount_cost (data, ivs);
4220     }
4221 }
4222
4223 /* Extend set IVS by expressing USE by some of the candidates in it
4224    if possible.  */
4225
4226 static void
4227 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4228                struct iv_use *use)
4229 {
4230   struct cost_pair *best_cp = NULL, *cp;
4231   bitmap_iterator bi;
4232   unsigned i;
4233
4234   gcc_assert (ivs->upto >= use->id);
4235
4236   if (ivs->upto == use->id)
4237     {
4238       ivs->upto++;
4239       ivs->bad_uses++;
4240     }
4241
4242   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4243     {
4244       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4245
4246       if (cheaper_cost_pair (cp, best_cp))
4247         best_cp = cp;
4248     }
4249
4250   iv_ca_set_cp (data, ivs, use, best_cp);
4251 }
4252
4253 /* Get cost for assignment IVS.  */
4254
4255 static comp_cost
4256 iv_ca_cost (struct iv_ca *ivs)
4257 {
4258   return (ivs->bad_uses ? infinite_cost : ivs->cost);
4259 }
4260
4261 /* Returns true if all dependences of CP are among invariants in IVS.  */
4262
4263 static bool
4264 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4265 {
4266   unsigned i;
4267   bitmap_iterator bi;
4268
4269   if (!cp->depends_on)
4270     return true;
4271
4272   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4273     {
4274       if (ivs->n_invariant_uses[i] == 0)
4275         return false;
4276     }
4277
4278   return true;
4279 }
4280
4281 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4282    it before NEXT_CHANGE.  */
4283
4284 static struct iv_ca_delta *
4285 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4286                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4287 {
4288   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4289
4290   change->use = use;
4291   change->old_cp = old_cp;
4292   change->new_cp = new_cp;
4293   change->next_change = next_change;
4294
4295   return change;
4296 }
4297
4298 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4299    are rewritten.  */
4300
4301 static struct iv_ca_delta *
4302 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4303 {
4304   struct iv_ca_delta *last;
4305
4306   if (!l2)
4307     return l1;
4308
4309   if (!l1)
4310     return l2;
4311
4312   for (last = l1; last->next_change; last = last->next_change)
4313     continue;
4314   last->next_change = l2;
4315
4316   return l1;
4317 }
4318
4319 /* Returns candidate by that USE is expressed in IVS.  */
4320
4321 static struct cost_pair *
4322 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4323 {
4324   return ivs->cand_for_use[use->id];
4325 }
4326
4327 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4328
4329 static struct iv_ca_delta *
4330 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4331 {
4332   struct iv_ca_delta *act, *next, *prev = NULL;
4333   struct cost_pair *tmp;
4334
4335   for (act = delta; act; act = next)
4336     {
4337       next = act->next_change;
4338       act->next_change = prev;
4339       prev = act;
4340
4341       tmp = act->old_cp;
4342       act->old_cp = act->new_cp;
4343       act->new_cp = tmp;
4344     }
4345
4346   return prev;
4347 }
4348
4349 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4350    reverted instead.  */
4351
4352 static void
4353 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4354                     struct iv_ca_delta *delta, bool forward)
4355 {
4356   struct cost_pair *from, *to;
4357   struct iv_ca_delta *act;
4358
4359   if (!forward)
4360     delta = iv_ca_delta_reverse (delta);
4361
4362   for (act = delta; act; act = act->next_change)
4363     {
4364       from = act->old_cp;
4365       to = act->new_cp;
4366       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4367       iv_ca_set_cp (data, ivs, act->use, to);
4368     }
4369
4370   if (!forward)
4371     iv_ca_delta_reverse (delta);
4372 }
4373
4374 /* Returns true if CAND is used in IVS.  */
4375
4376 static bool
4377 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4378 {
4379   return ivs->n_cand_uses[cand->id] > 0;
4380 }
4381
4382 /* Returns number of induction variable candidates in the set IVS.  */
4383
4384 static unsigned
4385 iv_ca_n_cands (struct iv_ca *ivs)
4386 {
4387   return ivs->n_cands;
4388 }
4389
4390 /* Free the list of changes DELTA.  */
4391
4392 static void
4393 iv_ca_delta_free (struct iv_ca_delta **delta)
4394 {
4395   struct iv_ca_delta *act, *next;
4396
4397   for (act = *delta; act; act = next)
4398     {
4399       next = act->next_change;
4400       free (act);
4401     }
4402
4403   *delta = NULL;
4404 }
4405
4406 /* Allocates new iv candidates assignment.  */
4407
4408 static struct iv_ca *
4409 iv_ca_new (struct ivopts_data *data)
4410 {
4411   struct iv_ca *nw = XNEW (struct iv_ca);
4412
4413   nw->upto = 0;
4414   nw->bad_uses = 0;
4415   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4416   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4417   nw->cands = BITMAP_ALLOC (NULL);
4418   nw->n_cands = 0;
4419   nw->n_regs = 0;
4420   nw->cand_use_cost = zero_cost;
4421   nw->cand_cost = 0;
4422   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4423   nw->cost = zero_cost;
4424
4425   return nw;
4426 }
4427
4428 /* Free memory occupied by the set IVS.  */
4429
4430 static void
4431 iv_ca_free (struct iv_ca **ivs)
4432 {
4433   free ((*ivs)->cand_for_use);
4434   free ((*ivs)->n_cand_uses);
4435   BITMAP_FREE ((*ivs)->cands);
4436   free ((*ivs)->n_invariant_uses);
4437   free (*ivs);
4438   *ivs = NULL;
4439 }
4440
4441 /* Dumps IVS to FILE.  */
4442
4443 static void
4444 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4445 {
4446   const char *pref = "  invariants ";
4447   unsigned i;
4448   comp_cost cost = iv_ca_cost (ivs);
4449
4450   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4451   bitmap_print (file, ivs->cands, "  candidates ","\n");
4452
4453   for (i = 1; i <= data->max_inv_id; i++)
4454     if (ivs->n_invariant_uses[i])
4455       {
4456         fprintf (file, "%s%d", pref, i);
4457         pref = ", ";
4458       }
4459   fprintf (file, "\n");
4460 }
4461
4462 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4463    new set, and store differences in DELTA.  Number of induction variables
4464    in the new set is stored to N_IVS.  */
4465
4466 static comp_cost
4467 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4468               struct iv_cand *cand, struct iv_ca_delta **delta,
4469               unsigned *n_ivs)
4470 {
4471   unsigned i;
4472   comp_cost cost;
4473   struct iv_use *use;
4474   struct cost_pair *old_cp, *new_cp;
4475
4476   *delta = NULL;
4477   for (i = 0; i < ivs->upto; i++)
4478     {
4479       use = iv_use (data, i);
4480       old_cp = iv_ca_cand_for_use (ivs, use);
4481
4482       if (old_cp
4483           && old_cp->cand == cand)
4484         continue;
4485
4486       new_cp = get_use_iv_cost (data, use, cand);
4487       if (!new_cp)
4488         continue;
4489
4490       if (!iv_ca_has_deps (ivs, new_cp))
4491         continue;
4492
4493       if (!cheaper_cost_pair (new_cp, old_cp))
4494         continue;
4495
4496       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4497     }
4498
4499   iv_ca_delta_commit (data, ivs, *delta, true);
4500   cost = iv_ca_cost (ivs);
4501   if (n_ivs)
4502     *n_ivs = iv_ca_n_cands (ivs);
4503   iv_ca_delta_commit (data, ivs, *delta, false);
4504
4505   return cost;
4506 }
4507
4508 /* Try narrowing set IVS by removing CAND.  Return the cost of
4509    the new set and store the differences in DELTA.  */
4510
4511 static comp_cost
4512 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4513               struct iv_cand *cand, struct iv_ca_delta **delta)
4514 {
4515   unsigned i, ci;
4516   struct iv_use *use;
4517   struct cost_pair *old_cp, *new_cp, *cp;
4518   bitmap_iterator bi;
4519   struct iv_cand *cnd;
4520   comp_cost cost;
4521
4522   *delta = NULL;
4523   for (i = 0; i < n_iv_uses (data); i++)
4524     {
4525       use = iv_use (data, i);
4526
4527       old_cp = iv_ca_cand_for_use (ivs, use);
4528       if (old_cp->cand != cand)
4529         continue;
4530
4531       new_cp = NULL;
4532
4533       if (data->consider_all_candidates)
4534         {
4535           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4536             {
4537               if (ci == cand->id)
4538                 continue;
4539
4540               cnd = iv_cand (data, ci);
4541
4542               cp = get_use_iv_cost (data, use, cnd);
4543               if (!cp)
4544                 continue;
4545               if (!iv_ca_has_deps (ivs, cp))
4546                 continue;
4547
4548               if (!cheaper_cost_pair (cp, new_cp))
4549                 continue;
4550
4551               new_cp = cp;
4552             }
4553         }
4554       else
4555         {
4556           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4557             {
4558               if (ci == cand->id)
4559                 continue;
4560
4561               cnd = iv_cand (data, ci);
4562
4563               cp = get_use_iv_cost (data, use, cnd);
4564               if (!cp)
4565                 continue;
4566               if (!iv_ca_has_deps (ivs, cp))
4567                 continue;
4568
4569               if (!cheaper_cost_pair (cp, new_cp))
4570                 continue;
4571
4572               new_cp = cp;
4573             }
4574         }
4575
4576       if (!new_cp)
4577         {
4578           iv_ca_delta_free (delta);
4579           return infinite_cost;
4580         }
4581
4582       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4583     }
4584
4585   iv_ca_delta_commit (data, ivs, *delta, true);
4586   cost = iv_ca_cost (ivs);
4587   iv_ca_delta_commit (data, ivs, *delta, false);
4588
4589   return cost;
4590 }
4591
4592 /* Try optimizing the set of candidates IVS by removing candidates different
4593    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4594    differences in DELTA.  */
4595
4596 static comp_cost
4597 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4598              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4599 {
4600   bitmap_iterator bi;
4601   struct iv_ca_delta *act_delta, *best_delta;
4602   unsigned i;
4603   comp_cost best_cost, acost;
4604   struct iv_cand *cand;
4605
4606   best_delta = NULL;
4607   best_cost = iv_ca_cost (ivs);
4608
4609   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4610     {
4611       cand = iv_cand (data, i);
4612
4613       if (cand == except_cand)
4614         continue;
4615
4616       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4617
4618       if (compare_costs (acost, best_cost) < 0)
4619         {
4620           best_cost = acost;
4621           iv_ca_delta_free (&best_delta);
4622           best_delta = act_delta;
4623         }
4624       else
4625         iv_ca_delta_free (&act_delta);
4626     }
4627
4628   if (!best_delta)
4629     {
4630       *delta = NULL;
4631       return best_cost;
4632     }
4633
4634   /* Recurse to possibly remove other unnecessary ivs.  */
4635   iv_ca_delta_commit (data, ivs, best_delta, true);
4636   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4637   iv_ca_delta_commit (data, ivs, best_delta, false);
4638   *delta = iv_ca_delta_join (best_delta, *delta);
4639   return best_cost;
4640 }
4641
4642 /* Tries to extend the sets IVS in the best possible way in order
4643    to express the USE.  */
4644
4645 static bool
4646 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4647                   struct iv_use *use)
4648 {
4649   comp_cost best_cost, act_cost;
4650   unsigned i;
4651   bitmap_iterator bi;
4652   struct iv_cand *cand;
4653   struct iv_ca_delta *best_delta = NULL, *act_delta;
4654   struct cost_pair *cp;
4655
4656   iv_ca_add_use (data, ivs, use);
4657   best_cost = iv_ca_cost (ivs);
4658
4659   cp = iv_ca_cand_for_use (ivs, use);
4660   if (cp)
4661     {
4662       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4663       iv_ca_set_no_cp (data, ivs, use);
4664     }
4665
4666   /* First try important candidates not based on any memory object.  Only if
4667      this fails, try the specific ones.  Rationale -- in loops with many
4668      variables the best choice often is to use just one generic biv.  If we
4669      added here many ivs specific to the uses, the optimization algorithm later
4670      would be likely to get stuck in a local minimum, thus causing us to create
4671      too many ivs.  The approach from few ivs to more seems more likely to be
4672      successful -- starting from few ivs, replacing an expensive use by a
4673      specific iv should always be a win.  */
4674   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4675     {
4676       cand = iv_cand (data, i);
4677
4678       if (cand->iv->base_object != NULL_TREE)
4679         continue;
4680
4681       if (iv_ca_cand_used_p (ivs, cand))
4682         continue;
4683
4684       cp = get_use_iv_cost (data, use, cand);
4685       if (!cp)
4686         continue;
4687
4688       iv_ca_set_cp (data, ivs, use, cp);
4689       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4690       iv_ca_set_no_cp (data, ivs, use);
4691       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4692
4693       if (compare_costs (act_cost, best_cost) < 0)
4694         {
4695           best_cost = act_cost;
4696
4697           iv_ca_delta_free (&best_delta);
4698           best_delta = act_delta;
4699         }
4700       else
4701         iv_ca_delta_free (&act_delta);
4702     }
4703
4704   if (infinite_cost_p (best_cost))
4705     {
4706       for (i = 0; i < use->n_map_members; i++)
4707         {
4708           cp = use->cost_map + i;
4709           cand = cp->cand;
4710           if (!cand)
4711             continue;
4712
4713           /* Already tried this.  */
4714           if (cand->important && cand->iv->base_object == NULL_TREE)
4715             continue;
4716
4717           if (iv_ca_cand_used_p (ivs, cand))
4718             continue;
4719
4720           act_delta = NULL;
4721           iv_ca_set_cp (data, ivs, use, cp);
4722           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4723           iv_ca_set_no_cp (data, ivs, use);
4724           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4725                                        cp, act_delta);
4726
4727           if (compare_costs (act_cost, best_cost) < 0)
4728             {
4729               best_cost = act_cost;
4730
4731               if (best_delta)
4732                 iv_ca_delta_free (&best_delta);
4733               best_delta = act_delta;
4734             }
4735           else
4736             iv_ca_delta_free (&act_delta);
4737         }
4738     }
4739
4740   iv_ca_delta_commit (data, ivs, best_delta, true);
4741   iv_ca_delta_free (&best_delta);
4742
4743   return !infinite_cost_p (best_cost);
4744 }
4745
4746 /* Finds an initial assignment of candidates to uses.  */
4747
4748 static struct iv_ca *
4749 get_initial_solution (struct ivopts_data *data)
4750 {
4751   struct iv_ca *ivs = iv_ca_new (data);
4752   unsigned i;
4753
4754   for (i = 0; i < n_iv_uses (data); i++)
4755     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4756       {
4757         iv_ca_free (&ivs);
4758         return NULL;
4759       }
4760
4761   return ivs;
4762 }
4763
4764 /* Tries to improve set of induction variables IVS.  */
4765
4766 static bool
4767 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4768 {
4769   unsigned i, n_ivs;
4770   comp_cost acost, best_cost = iv_ca_cost (ivs);
4771   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4772   struct iv_cand *cand;
4773
4774   /* Try extending the set of induction variables by one.  */
4775   for (i = 0; i < n_iv_cands (data); i++)
4776     {
4777       cand = iv_cand (data, i);
4778
4779       if (iv_ca_cand_used_p (ivs, cand))
4780         continue;
4781
4782       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4783       if (!act_delta)
4784         continue;
4785
4786       /* If we successfully added the candidate and the set is small enough,
4787          try optimizing it by removing other candidates.  */
4788       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4789         {
4790           iv_ca_delta_commit (data, ivs, act_delta, true);
4791           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4792           iv_ca_delta_commit (data, ivs, act_delta, false);
4793           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4794         }
4795
4796       if (compare_costs (acost, best_cost) < 0)
4797         {
4798           best_cost = acost;
4799           iv_ca_delta_free (&best_delta);
4800           best_delta = act_delta;
4801         }
4802       else
4803         iv_ca_delta_free (&act_delta);
4804     }
4805
4806   if (!best_delta)
4807     {
4808       /* Try removing the candidates from the set instead.  */
4809       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4810
4811       /* Nothing more we can do.  */
4812       if (!best_delta)
4813         return false;
4814     }
4815
4816   iv_ca_delta_commit (data, ivs, best_delta, true);
4817   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4818   iv_ca_delta_free (&best_delta);
4819   return true;
4820 }
4821
4822 /* Attempts to find the optimal set of induction variables.  We do simple
4823    greedy heuristic -- we try to replace at most one candidate in the selected
4824    solution and remove the unused ivs while this improves the cost.  */
4825
4826 static struct iv_ca *
4827 find_optimal_iv_set (struct ivopts_data *data)
4828 {
4829   unsigned i;
4830   struct iv_ca *set;
4831   struct iv_use *use;
4832
4833   /* Get the initial solution.  */
4834   set = get_initial_solution (data);
4835   if (!set)
4836     {
4837       if (dump_file && (dump_flags & TDF_DETAILS))
4838         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4839       return NULL;
4840     }
4841
4842   if (dump_file && (dump_flags & TDF_DETAILS))
4843     {
4844       fprintf (dump_file, "Initial set of candidates:\n");
4845       iv_ca_dump (data, dump_file, set);
4846     }
4847
4848   while (try_improve_iv_set (data, set))
4849     {
4850       if (dump_file && (dump_flags & TDF_DETAILS))
4851         {
4852           fprintf (dump_file, "Improved to:\n");
4853           iv_ca_dump (data, dump_file, set);
4854         }
4855     }
4856
4857   if (dump_file && (dump_flags & TDF_DETAILS))
4858     {
4859       comp_cost cost = iv_ca_cost (set);
4860       fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4861     }
4862
4863   for (i = 0; i < n_iv_uses (data); i++)
4864     {
4865       use = iv_use (data, i);
4866       use->selected = iv_ca_cand_for_use (set, use)->cand;
4867     }
4868
4869   return set;
4870 }
4871
4872 /* Creates a new induction variable corresponding to CAND.  */
4873
4874 static void
4875 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4876 {
4877   block_stmt_iterator incr_pos;
4878   tree base;
4879   bool after = false;
4880
4881   if (!cand->iv)
4882     return;
4883
4884   switch (cand->pos)
4885     {
4886     case IP_NORMAL:
4887       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4888       break;
4889
4890     case IP_END:
4891       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4892       after = true;
4893       break;
4894
4895     case IP_ORIGINAL:
4896       /* Mark that the iv is preserved.  */
4897       name_info (data, cand->var_before)->preserve_biv = true;
4898       name_info (data, cand->var_after)->preserve_biv = true;
4899
4900       /* Rewrite the increment so that it uses var_before directly.  */
4901       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4902
4903       return;
4904     }
4905
4906   gimple_add_tmp_var (cand->var_before);
4907   add_referenced_var (cand->var_before);
4908
4909   base = unshare_expr (cand->iv->base);
4910
4911   create_iv (base, unshare_expr (cand->iv->step),
4912              cand->var_before, data->current_loop,
4913              &incr_pos, after, &cand->var_before, &cand->var_after);
4914 }
4915
4916 /* Creates new induction variables described in SET.  */
4917
4918 static void
4919 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4920 {
4921   unsigned i;
4922   struct iv_cand *cand;
4923   bitmap_iterator bi;
4924
4925   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4926     {
4927       cand = iv_cand (data, i);
4928       create_new_iv (data, cand);
4929     }
4930 }
4931
4932 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4933    is true, remove also the ssa name defined by the statement.  */
4934
4935 static void
4936 remove_statement (tree stmt, bool including_defined_name)
4937 {
4938   if (TREE_CODE (stmt) == PHI_NODE)
4939     {
4940       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4941     }
4942   else
4943     {
4944       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4945
4946       bsi_remove (&bsi, true);
4947       release_defs (stmt);
4948     }
4949 }
4950
4951 /* Rewrites USE (definition of iv used in a nonlinear expression)
4952    using candidate CAND.  */
4953
4954 static void
4955 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4956                             struct iv_use *use, struct iv_cand *cand)
4957 {
4958   tree comp;
4959   tree op, tgt, ass;
4960   block_stmt_iterator bsi;
4961
4962   /* An important special case -- if we are asked to express value of
4963      the original iv by itself, just exit; there is no need to
4964      introduce a new computation (that might also need casting the
4965      variable to unsigned and back).  */
4966   if (cand->pos == IP_ORIGINAL
4967       && cand->incremented_at == use->stmt)
4968     {
4969       tree step, ctype, utype;
4970       enum tree_code incr_code = PLUS_EXPR;
4971
4972       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4973       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4974
4975       step = cand->iv->step;
4976       ctype = TREE_TYPE (step);
4977       utype = TREE_TYPE (cand->var_after);
4978       if (TREE_CODE (step) == NEGATE_EXPR)
4979         {
4980           incr_code = MINUS_EXPR;
4981           step = TREE_OPERAND (step, 0);
4982         }
4983
4984       /* Check whether we may leave the computation unchanged.
4985          This is the case only if it does not rely on other
4986          computations in the loop -- otherwise, the computation
4987          we rely upon may be removed in remove_unused_ivs,
4988          thus leading to ICE.  */
4989       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4990       if (TREE_CODE (op) == PLUS_EXPR
4991           || TREE_CODE (op) == MINUS_EXPR
4992           || TREE_CODE (op) == POINTER_PLUS_EXPR)
4993         {
4994           if (TREE_OPERAND (op, 0) == cand->var_before)
4995             op = TREE_OPERAND (op, 1);
4996           else if (TREE_CODE (op) != MINUS_EXPR
4997                    && TREE_OPERAND (op, 1) == cand->var_before)
4998             op = TREE_OPERAND (op, 0);
4999           else
5000             op = NULL_TREE;
5001         }
5002       else
5003         op = NULL_TREE;
5004
5005       if (op
5006           && (TREE_CODE (op) == INTEGER_CST
5007               || operand_equal_p (op, step, 0)))
5008         return;
5009
5010       /* Otherwise, add the necessary computations to express
5011          the iv.  */
5012       op = fold_convert (ctype, cand->var_before);
5013       comp = fold_convert (utype,
5014                            build2 (incr_code, ctype, op,
5015                                    unshare_expr (step)));
5016     }
5017   else
5018     {
5019       comp = get_computation (data->current_loop, use, cand);
5020       gcc_assert (comp != NULL_TREE);
5021     }
5022
5023   switch (TREE_CODE (use->stmt))
5024     {
5025     case PHI_NODE:
5026       tgt = PHI_RESULT (use->stmt);
5027
5028       /* If we should keep the biv, do not replace it.  */
5029       if (name_info (data, tgt)->preserve_biv)
5030         return;
5031
5032       bsi = bsi_after_labels (bb_for_stmt (use->stmt));
5033       break;
5034
5035     case GIMPLE_MODIFY_STMT:
5036       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
5037       bsi = bsi_for_stmt (use->stmt);
5038       break;
5039
5040     default:
5041       gcc_unreachable ();
5042     }
5043
5044   op = force_gimple_operand_bsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5045                                  true, BSI_SAME_STMT);
5046
5047   if (TREE_CODE (use->stmt) == PHI_NODE)
5048     {
5049       ass = build_gimple_modify_stmt (tgt, op);
5050       bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
5051       remove_statement (use->stmt, false);
5052       SSA_NAME_DEF_STMT (tgt) = ass;
5053     }
5054   else
5055     GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
5056 }
5057
5058 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5059    for_each_index.  */
5060
5061 static bool
5062 idx_remove_ssa_names (tree base, tree *idx,
5063                       void *data ATTRIBUTE_UNUSED)
5064 {
5065   tree *op;
5066
5067   if (TREE_CODE (*idx) == SSA_NAME)
5068     *idx = SSA_NAME_VAR (*idx);
5069
5070   if (TREE_CODE (base) == ARRAY_REF)
5071     {
5072       op = &TREE_OPERAND (base, 2);
5073       if (*op
5074           && TREE_CODE (*op) == SSA_NAME)
5075         *op = SSA_NAME_VAR (*op);
5076       op = &TREE_OPERAND (base, 3);
5077       if (*op
5078           && TREE_CODE (*op) == SSA_NAME)
5079         *op = SSA_NAME_VAR (*op);
5080     }
5081
5082   return true;
5083 }
5084
5085 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5086
5087 static tree
5088 unshare_and_remove_ssa_names (tree ref)
5089 {
5090   ref = unshare_expr (ref);
5091   for_each_index (&ref, idx_remove_ssa_names, NULL);
5092
5093   return ref;
5094 }
5095
5096 /* Extract the alias analysis info for the memory reference REF.  There are
5097    several ways how this information may be stored and what precisely is
5098    its semantics depending on the type of the reference, but there always is
5099    somewhere hidden one _DECL node that is used to determine the set of
5100    virtual operands for the reference.  The code below deciphers this jungle
5101    and extracts this single useful piece of information.  */
5102
5103 static tree
5104 get_ref_tag (tree ref, tree orig)
5105 {
5106   tree var = get_base_address (ref);
5107   tree aref = NULL_TREE, tag, sv;
5108   HOST_WIDE_INT offset, size, maxsize;
5109
5110   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5111     {
5112       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5113       if (ref)
5114         break;
5115     }
5116
5117   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5118     return aref;
5119
5120   if (!var)
5121     return NULL_TREE;
5122
5123   if (TREE_CODE (var) == INDIRECT_REF)
5124     {
5125       /* If the base is a dereference of a pointer, first check its name memory
5126          tag.  If it does not have one, use its symbol memory tag.  */
5127       var = TREE_OPERAND (var, 0);
5128       if (TREE_CODE (var) != SSA_NAME)
5129         return NULL_TREE;
5130
5131       if (SSA_NAME_PTR_INFO (var))
5132         {
5133           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5134           if (tag)
5135             return tag;
5136         }
5137
5138       var = SSA_NAME_VAR (var);
5139       tag = symbol_mem_tag (var);
5140       gcc_assert (tag != NULL_TREE);
5141       return tag;
5142     }
5143   else
5144     {
5145       if (!DECL_P (var))
5146         return NULL_TREE;
5147
5148       tag = symbol_mem_tag (var);
5149       if (tag)
5150         return tag;
5151
5152       return var;
5153     }
5154 }
5155
5156 /* Copies the reference information from OLD_REF to NEW_REF.  */
5157
5158 static void
5159 copy_ref_info (tree new_ref, tree old_ref)
5160 {
5161   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5162     copy_mem_ref_info (new_ref, old_ref);
5163   else
5164     {
5165       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5166       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5167     }
5168 }
5169
5170 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5171
5172 static void
5173 rewrite_use_address (struct ivopts_data *data,
5174                      struct iv_use *use, struct iv_cand *cand)
5175 {
5176   aff_tree aff;
5177   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5178   tree ref;
5179   bool ok;
5180
5181   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5182   gcc_assert (ok);
5183   unshare_aff_combination (&aff);
5184
5185   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5186   copy_ref_info (ref, *use->op_p);
5187   *use->op_p = ref;
5188 }
5189
5190 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5191    candidate CAND.  */
5192
5193 static void
5194 rewrite_use_compare (struct ivopts_data *data,
5195                      struct iv_use *use, struct iv_cand *cand)
5196 {
5197   tree comp, *var_p, op, bound;
5198   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5199   enum tree_code compare;
5200   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5201   bool ok;
5202
5203   bound = cp->value;
5204   if (bound)
5205     {
5206       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5207       tree var_type = TREE_TYPE (var);
5208
5209       compare = iv_elimination_compare (data, use);
5210       bound = unshare_expr (fold_convert (var_type, bound));
5211       op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE,
5212                                      true, BSI_SAME_STMT);
5213
5214       *use->op_p = build2 (compare, boolean_type_node, var, op);
5215       return;
5216     }
5217
5218   /* The induction variable elimination failed; just express the original
5219      giv.  */
5220   comp = get_computation (data->current_loop, use, cand);
5221   gcc_assert (comp != NULL_TREE);
5222
5223   ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5224   gcc_assert (ok);
5225
5226   *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5227                                      true, BSI_SAME_STMT);
5228 }
5229
5230 /* Rewrites USE using candidate CAND.  */
5231
5232 static void
5233 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5234 {
5235   push_stmt_changes (&use->stmt);
5236
5237   switch (use->type)
5238     {
5239       case USE_NONLINEAR_EXPR:
5240         rewrite_use_nonlinear_expr (data, use, cand);
5241         break;
5242
5243       case USE_ADDRESS:
5244         rewrite_use_address (data, use, cand);
5245         break;
5246
5247       case USE_COMPARE:
5248         rewrite_use_compare (data, use, cand);
5249         break;
5250
5251       default:
5252         gcc_unreachable ();
5253     }
5254
5255   pop_stmt_changes (&use->stmt);
5256 }
5257
5258 /* Rewrite the uses using the selected induction variables.  */
5259
5260 static void
5261 rewrite_uses (struct ivopts_data *data)
5262 {
5263   unsigned i;
5264   struct iv_cand *cand;
5265   struct iv_use *use;
5266
5267   for (i = 0; i < n_iv_uses (data); i++)
5268     {
5269       use = iv_use (data, i);
5270       cand = use->selected;
5271       gcc_assert (cand);
5272
5273       rewrite_use (data, use, cand);
5274     }
5275 }
5276
5277 /* Removes the ivs that are not used after rewriting.  */
5278
5279 static void
5280 remove_unused_ivs (struct ivopts_data *data)
5281 {
5282   unsigned j;
5283   bitmap_iterator bi;
5284
5285   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5286     {
5287       struct version_info *info;
5288
5289       info = ver_info (data, j);
5290       if (info->iv
5291           && !integer_zerop (info->iv->step)
5292           && !info->inv_id
5293           && !info->iv->have_use_for
5294           && !info->preserve_biv)
5295         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5296     }
5297 }
5298
5299 /* Frees data allocated by the optimization of a single loop.  */
5300
5301 static void
5302 free_loop_data (struct ivopts_data *data)
5303 {
5304   unsigned i, j;
5305   bitmap_iterator bi;
5306   tree obj;
5307
5308   if (data->niters)
5309     {
5310       pointer_map_destroy (data->niters);
5311       data->niters = NULL;
5312     }
5313
5314   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5315     {
5316       struct version_info *info;
5317
5318       info = ver_info (data, i);
5319       if (info->iv)
5320         free (info->iv);
5321       info->iv = NULL;
5322       info->has_nonlin_use = false;
5323       info->preserve_biv = false;
5324       info->inv_id = 0;
5325     }
5326   bitmap_clear (data->relevant);
5327   bitmap_clear (data->important_candidates);
5328
5329   for (i = 0; i < n_iv_uses (data); i++)
5330     {
5331       struct iv_use *use = iv_use (data, i);
5332
5333       free (use->iv);
5334       BITMAP_FREE (use->related_cands);
5335       for (j = 0; j < use->n_map_members; j++)
5336         if (use->cost_map[j].depends_on)
5337           BITMAP_FREE (use->cost_map[j].depends_on);
5338       free (use->cost_map);
5339       free (use);
5340     }
5341   VEC_truncate (iv_use_p, data->iv_uses, 0);
5342
5343   for (i = 0; i < n_iv_cands (data); i++)
5344     {
5345       struct iv_cand *cand = iv_cand (data, i);
5346
5347       if (cand->iv)
5348         free (cand->iv);
5349       if (cand->depends_on)
5350         BITMAP_FREE (cand->depends_on);
5351       free (cand);
5352     }
5353   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5354
5355   if (data->version_info_size < num_ssa_names)
5356     {
5357       data->version_info_size = 2 * num_ssa_names;
5358       free (data->version_info);
5359       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5360     }
5361
5362   data->max_inv_id = 0;
5363
5364   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5365     SET_DECL_RTL (obj, NULL_RTX);
5366
5367   VEC_truncate (tree, decl_rtl_to_reset, 0);
5368 }
5369
5370 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5371    loop tree.  */
5372
5373 static void
5374 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5375 {
5376   free_loop_data (data);
5377   free (data->version_info);
5378   BITMAP_FREE (data->relevant);
5379   BITMAP_FREE (data->important_candidates);
5380
5381   VEC_free (tree, heap, decl_rtl_to_reset);
5382   VEC_free (iv_use_p, heap, data->iv_uses);
5383   VEC_free (iv_cand_p, heap, data->iv_candidates);
5384 }
5385
5386 /* Optimizes the LOOP.  Returns true if anything changed.  */
5387
5388 static bool
5389 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5390 {
5391   bool changed = false;
5392   struct iv_ca *iv_ca;
5393   edge exit;
5394
5395   gcc_assert (!data->niters);
5396   data->current_loop = loop;
5397
5398   if (dump_file && (dump_flags & TDF_DETAILS))
5399     {
5400       fprintf (dump_file, "Processing loop %d\n", loop->num);
5401
5402       exit = single_dom_exit (loop);
5403       if (exit)
5404         {
5405           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5406                    exit->src->index, exit->dest->index);
5407           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5408           fprintf (dump_file, "\n");
5409         }
5410
5411       fprintf (dump_file, "\n");
5412     }
5413
5414   /* For each ssa name determines whether it behaves as an induction variable
5415      in some loop.  */
5416   if (!find_induction_variables (data))
5417     goto finish;
5418
5419   /* Finds interesting uses (item 1).  */
5420   find_interesting_uses (data);
5421   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5422     goto finish;
5423
5424   /* Finds candidates for the induction variables (item 2).  */
5425   find_iv_candidates (data);
5426
5427   /* Calculates the costs (item 3, part 1).  */
5428   determine_use_iv_costs (data);
5429   determine_iv_costs (data);
5430   determine_set_costs (data);
5431
5432   /* Find the optimal set of induction variables (item 3, part 2).  */
5433   iv_ca = find_optimal_iv_set (data);
5434   if (!iv_ca)
5435     goto finish;
5436   changed = true;
5437
5438   /* Create the new induction variables (item 4, part 1).  */
5439   create_new_ivs (data, iv_ca);
5440   iv_ca_free (&iv_ca);
5441
5442   /* Rewrite the uses (item 4, part 2).  */
5443   rewrite_uses (data);
5444
5445   /* Remove the ivs that are unused after rewriting.  */
5446   remove_unused_ivs (data);
5447
5448   /* We have changed the structure of induction variables; it might happen
5449      that definitions in the scev database refer to some of them that were
5450      eliminated.  */
5451   scev_reset ();
5452
5453 finish:
5454   free_loop_data (data);
5455
5456   return changed;
5457 }
5458
5459 /* Main entry point.  Optimizes induction variables in loops.  */
5460
5461 void
5462 tree_ssa_iv_optimize (void)
5463 {
5464   struct loop *loop;
5465   struct ivopts_data data;
5466   loop_iterator li;
5467
5468   tree_ssa_iv_optimize_init (&data);
5469
5470   /* Optimize the loops starting with the innermost ones.  */
5471   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5472     {
5473       if (dump_file && (dump_flags & TDF_DETAILS))
5474         flow_loop_dump (loop, dump_file, NULL, 1);
5475
5476       tree_ssa_iv_optimize_loop (&data, loop);
5477     }
5478
5479   tree_ssa_iv_optimize_finalize (&data);
5480 }