gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "rtl.h"
  70 #include "tm_p.h"
  71 #include "hard-reg-set.h"
  72 #include "basic-block.h"
  73 #include "output.h"
  74 #include "diagnostic.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "varray.h"
  80 #include "expr.h"
  81 #include "tree-pass.h"
  82 #include "ggc.h"
  83 #include "insn-config.h"
  84 #include "recog.h"
  85 #include "pointer-set.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92 #include "tree-affine.h"
  93 #include "target.h"
  94
  95 /* The infinite cost.  */
  96 #define INFTY 10000000
  97
  98 /* The expected number of loop iterations.  TODO -- use profiling instead of
  99    this.  */
 100 #define AVG_LOOP_NITER(LOOP) 5
 101
 102
 103 /* Representation of the induction variable.  */
 104 struct iv
 105 {
 106   tree base;            /* Initial value of the iv.  */
 107   tree base_object;     /* A memory object to that the induction variable points.  */
 108   tree step;            /* Step of the iv (constant only).  */
 109   tree ssa_name;        /* The ssa name with the value.  */
 110   bool biv_p;           /* Is it a biv?  */
 111   bool have_use_for;    /* Do we already have a use for it?  */
 112   unsigned use_id;      /* The identifier in the use if it is the case.  */
 113 };
 114
 115 /* Per-ssa version information (induction variable descriptions, etc.).  */
 116 struct version_info
 117 {
 118   tree name;            /* The ssa name.  */
 119   struct iv *iv;        /* Induction variable description.  */
 120   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 121                            an expression that is not an induction variable.  */
 122   unsigned inv_id;      /* Id of an invariant.  */
 123   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 124 };
 125
 126 /* Types of uses.  */
 127 enum use_type
 128 {
 129   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 130   USE_ADDRESS,          /* Use in an address.  */
 131   USE_COMPARE           /* Use is a compare.  */
 132 };
 133
 134 /* Cost of a computation.  */
 135 typedef struct
 136 {
 137   unsigned cost;        /* The runtime cost.  */
 138   unsigned complexity;  /* The estimate of the complexity of the code for
 139                            the computation (in no concrete units --
 140                            complexity field should be larger for more
 141                            complex expressions and addressing modes).  */
 142 } comp_cost;
 143
 144 static const comp_cost zero_cost = {0, 0};
 145 static const comp_cost infinite_cost = {INFTY, INFTY};
 146
 147 /* The candidate - cost pair.  */
 148 struct cost_pair
 149 {
 150   struct iv_cand *cand; /* The candidate.  */
 151   comp_cost cost;       /* The cost.  */
 152   bitmap depends_on;    /* The list of invariants that have to be
 153                            preserved.  */
 154   tree value;           /* For final value elimination, the expression for
 155                            the final value of the iv.  For iv elimination,
 156                            the new bound to compare with.  */
 157 };
 158
 159 /* Use.  */
 160 struct iv_use
 161 {
 162   unsigned id;          /* The id of the use.  */
 163   enum use_type type;   /* Type of the use.  */
 164   struct iv *iv;        /* The induction variable it is based on.  */
 165   tree stmt;            /* Statement in that it occurs.  */
 166   tree *op_p;           /* The place where it occurs.  */
 167   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 168                            important ones.  */
 169
 170   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 171   struct cost_pair *cost_map;
 172                         /* The costs wrto the iv candidates.  */
 173
 174   struct iv_cand *selected;
 175                         /* The selected candidate.  */
 176 };
 177
 178 /* The position where the iv is computed.  */
 179 enum iv_position
 180 {
 181   IP_NORMAL,            /* At the end, just before the exit condition.  */
 182   IP_END,               /* At the end of the latch block.  */
 183   IP_ORIGINAL           /* The original biv.  */
 184 };
 185
 186 /* The induction variable candidate.  */
 187 struct iv_cand
 188 {
 189   unsigned id;          /* The number of the candidate.  */
 190   bool important;       /* Whether this is an "important" candidate, i.e. such
 191                            that it should be considered by all uses.  */
 192   enum iv_position pos; /* Where it is computed.  */
 193   tree incremented_at;  /* For original biv, the statement where it is
 194                            incremented.  */
 195   tree var_before;      /* The variable used for it before increment.  */
 196   tree var_after;       /* The variable used for it after increment.  */
 197   struct iv *iv;        /* The value of the candidate.  NULL for
 198                            "pseudocandidate" used to indicate the possibility
 199                            to replace the final value of an iv by direct
 200                            computation of the value.  */
 201   unsigned cost;        /* Cost of the candidate.  */
 202   bitmap depends_on;    /* The list of invariants that are used in step of the
 203                            biv.  */
 204 };
 205
 206 /* The data used by the induction variable optimizations.  */
 207
 208 typedef struct iv_use *iv_use_p;
 209 DEF_VEC_P(iv_use_p);
 210 DEF_VEC_ALLOC_P(iv_use_p,heap);
 211
 212 typedef struct iv_cand *iv_cand_p;
 213 DEF_VEC_P(iv_cand_p);
 214 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 215
 216 struct ivopts_data
 217 {
 218   /* The currently optimized loop.  */
 219   struct loop *current_loop;
 220
 221   /* Number of registers used in it.  */
 222   unsigned regs_used;
 223
 224   /* Numbers of iterations for all exits of the current loop.  */
 225   struct pointer_map_t *niters;
 226
 227   /* The size of version_info array allocated.  */
 228   unsigned version_info_size;
 229
 230   /* The array of information for the ssa names.  */
 231   struct version_info *version_info;
 232
 233   /* The bitmap of indices in version_info whose value was changed.  */
 234   bitmap relevant;
 235
 236   /* The maximum invariant id.  */
 237   unsigned max_inv_id;
 238
 239   /* The uses of induction variables.  */
 240   VEC(iv_use_p,heap) *iv_uses;
 241
 242   /* The candidates.  */
 243   VEC(iv_cand_p,heap) *iv_candidates;
 244
 245   /* A bitmap of important candidates.  */
 246   bitmap important_candidates;
 247
 248   /* Whether to consider just related and important candidates when replacing a
 249      use.  */
 250   bool consider_all_candidates;
 251 };
 252
 253 /* An assignment of iv candidates to uses.  */
 254
 255 struct iv_ca
 256 {
 257   /* The number of uses covered by the assignment.  */
 258   unsigned upto;
 259
 260   /* Number of uses that cannot be expressed by the candidates in the set.  */
 261   unsigned bad_uses;
 262
 263   /* Candidate assigned to a use, together with the related costs.  */
 264   struct cost_pair **cand_for_use;
 265
 266   /* Number of times each candidate is used.  */
 267   unsigned *n_cand_uses;
 268
 269   /* The candidates used.  */
 270   bitmap cands;
 271
 272   /* The number of candidates in the set.  */
 273   unsigned n_cands;
 274
 275   /* Total number of registers needed.  */
 276   unsigned n_regs;
 277
 278   /* Total cost of expressing uses.  */
 279   comp_cost cand_use_cost;
 280
 281   /* Total cost of candidates.  */
 282   unsigned cand_cost;
 283
 284   /* Number of times each invariant is used.  */
 285   unsigned *n_invariant_uses;
 286
 287   /* Total cost of the assignment.  */
 288   comp_cost cost;
 289 };
 290
 291 /* Difference of two iv candidate assignments.  */
 292
 293 struct iv_ca_delta
 294 {
 295   /* Changed use.  */
 296   struct iv_use *use;
 297
 298   /* An old assignment (for rollback purposes).  */
 299   struct cost_pair *old_cp;
 300
 301   /* A new assignment.  */
 302   struct cost_pair *new_cp;
 303
 304   /* Next change in the list.  */
 305   struct iv_ca_delta *next_change;
 306 };
 307
 308 /* Bound on number of candidates below that all candidates are considered.  */
 309
 310 #define CONSIDER_ALL_CANDIDATES_BOUND \
 311   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 312
 313 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 314    optimizing such a loop would help, and it would take ages).  */
 315
 316 #define MAX_CONSIDERED_USES \
 317   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 318
 319 /* If there are at most this number of ivs in the set, try removing unnecessary
 320    ivs from the set always.  */
 321
 322 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 323   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 324
 325 /* The list of trees for that the decl_rtl field must be reset is stored
 326    here.  */
 327
 328 static VEC(tree,heap) *decl_rtl_to_reset;
 329
 330 /* Number of uses recorded in DATA.  */
 331
 332 static inline unsigned
 333 n_iv_uses (struct ivopts_data *data)
 334 {
 335   return VEC_length (iv_use_p, data->iv_uses);
 336 }
 337
 338 /* Ith use recorded in DATA.  */
 339
 340 static inline struct iv_use *
 341 iv_use (struct ivopts_data *data, unsigned i)
 342 {
 343   return VEC_index (iv_use_p, data->iv_uses, i);
 344 }
 345
 346 /* Number of candidates recorded in DATA.  */
 347
 348 static inline unsigned
 349 n_iv_cands (struct ivopts_data *data)
 350 {
 351   return VEC_length (iv_cand_p, data->iv_candidates);
 352 }
 353
 354 /* Ith candidate recorded in DATA.  */
 355
 356 static inline struct iv_cand *
 357 iv_cand (struct ivopts_data *data, unsigned i)
 358 {
 359   return VEC_index (iv_cand_p, data->iv_candidates, i);
 360 }
 361
 362 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 363
 364 edge
 365 single_dom_exit (struct loop *loop)
 366 {
 367   edge exit = single_exit (loop);
 368
 369   if (!exit)
 370     return NULL;
 371
 372   if (!just_once_each_iteration_p (loop, exit->src))
 373     return NULL;
 374
 375   return exit;
 376 }
 377
 378 /* Dumps information about the induction variable IV to FILE.  */
 379
 380 extern void dump_iv (FILE *, struct iv *);
 381 void
 382 dump_iv (FILE *file, struct iv *iv)
 383 {
 384   if (iv->ssa_name)
 385     {
 386       fprintf (file, "ssa name ");
 387       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 388       fprintf (file, "\n");
 389     }
 390
 391   fprintf (file, "  type ");
 392   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 393   fprintf (file, "\n");
 394
 395   if (iv->step)
 396     {
 397       fprintf (file, "  base ");
 398       print_generic_expr (file, iv->base, TDF_SLIM);
 399       fprintf (file, "\n");
 400
 401       fprintf (file, "  step ");
 402       print_generic_expr (file, iv->step, TDF_SLIM);
 403       fprintf (file, "\n");
 404     }
 405   else
 406     {
 407       fprintf (file, "  invariant ");
 408       print_generic_expr (file, iv->base, TDF_SLIM);
 409       fprintf (file, "\n");
 410     }
 411
 412   if (iv->base_object)
 413     {
 414       fprintf (file, "  base object ");
 415       print_generic_expr (file, iv->base_object, TDF_SLIM);
 416       fprintf (file, "\n");
 417     }
 418
 419   if (iv->biv_p)
 420     fprintf (file, "  is a biv\n");
 421 }
 422
 423 /* Dumps information about the USE to FILE.  */
 424
 425 extern void dump_use (FILE *, struct iv_use *);
 426 void
 427 dump_use (FILE *file, struct iv_use *use)
 428 {
 429   fprintf (file, "use %d\n", use->id);
 430
 431   switch (use->type)
 432     {
 433     case USE_NONLINEAR_EXPR:
 434       fprintf (file, "  generic\n");
 435       break;
 436
 437     case USE_ADDRESS:
 438       fprintf (file, "  address\n");
 439       break;
 440
 441     case USE_COMPARE:
 442       fprintf (file, "  compare\n");
 443       break;
 444
 445     default:
 446       gcc_unreachable ();
 447     }
 448
 449   fprintf (file, "  in statement ");
 450   print_generic_expr (file, use->stmt, TDF_SLIM);
 451   fprintf (file, "\n");
 452
 453   fprintf (file, "  at position ");
 454   if (use->op_p)
 455     print_generic_expr (file, *use->op_p, TDF_SLIM);
 456   fprintf (file, "\n");
 457
 458   dump_iv (file, use->iv);
 459
 460   if (use->related_cands)
 461     {
 462       fprintf (file, "  related candidates ");
 463       dump_bitmap (file, use->related_cands);
 464     }
 465 }
 466
 467 /* Dumps information about the uses to FILE.  */
 468
 469 extern void dump_uses (FILE *, struct ivopts_data *);
 470 void
 471 dump_uses (FILE *file, struct ivopts_data *data)
 472 {
 473   unsigned i;
 474   struct iv_use *use;
 475
 476   for (i = 0; i < n_iv_uses (data); i++)
 477     {
 478       use = iv_use (data, i);
 479
 480       dump_use (file, use);
 481       fprintf (file, "\n");
 482     }
 483 }
 484
 485 /* Dumps information about induction variable candidate CAND to FILE.  */
 486
 487 extern void dump_cand (FILE *, struct iv_cand *);
 488 void
 489 dump_cand (FILE *file, struct iv_cand *cand)
 490 {
 491   struct iv *iv = cand->iv;
 492
 493   fprintf (file, "candidate %d%s\n",
 494            cand->id, cand->important ? " (important)" : "");
 495
 496   if (cand->depends_on)
 497     {
 498       fprintf (file, "  depends on ");
 499       dump_bitmap (file, cand->depends_on);
 500     }
 501
 502   if (!iv)
 503     {
 504       fprintf (file, "  final value replacement\n");
 505       return;
 506     }
 507
 508   switch (cand->pos)
 509     {
 510     case IP_NORMAL:
 511       fprintf (file, "  incremented before exit test\n");
 512       break;
 513
 514     case IP_END:
 515       fprintf (file, "  incremented at end\n");
 516       break;
 517
 518     case IP_ORIGINAL:
 519       fprintf (file, "  original biv\n");
 520       break;
 521     }
 522
 523   dump_iv (file, iv);
 524 }
 525
 526 /* Returns the info for ssa version VER.  */
 527
 528 static inline struct version_info *
 529 ver_info (struct ivopts_data *data, unsigned ver)
 530 {
 531   return data->version_info + ver;
 532 }
 533
 534 /* Returns the info for ssa name NAME.  */
 535
 536 static inline struct version_info *
 537 name_info (struct ivopts_data *data, tree name)
 538 {
 539   return ver_info (data, SSA_NAME_VERSION (name));
 540 }
 541
 542 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 543    emitted in LOOP.  */
 544
 545 static bool
 546 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 547 {
 548   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 549
 550   gcc_assert (bb);
 551
 552   if (sbb == loop->latch)
 553     return true;
 554
 555   if (sbb != bb)
 556     return false;
 557
 558   return stmt == last_stmt (bb);
 559 }
 560
 561 /* Returns true if STMT if after the place where the original induction
 562    variable CAND is incremented.  */
 563
 564 static bool
 565 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 566 {
 567   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 568   basic_block stmt_bb = bb_for_stmt (stmt);
 569   block_stmt_iterator bsi;
 570
 571   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 572     return false;
 573
 574   if (stmt_bb != cand_bb)
 575     return true;
 576
 577   /* Scan the block from the end, since the original ivs are usually
 578      incremented at the end of the loop body.  */
 579   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 580     {
 581       if (bsi_stmt (bsi) == cand->incremented_at)
 582         return false;
 583       if (bsi_stmt (bsi) == stmt)
 584         return true;
 585     }
 586 }
 587
 588 /* Returns true if STMT if after the place where the induction variable
 589    CAND is incremented in LOOP.  */
 590
 591 static bool
 592 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 593 {
 594   switch (cand->pos)
 595     {
 596     case IP_END:
 597       return false;
 598
 599     case IP_NORMAL:
 600       return stmt_after_ip_normal_pos (loop, stmt);
 601
 602     case IP_ORIGINAL:
 603       return stmt_after_ip_original_pos (cand, stmt);
 604
 605     default:
 606       gcc_unreachable ();
 607     }
 608 }
 609
 610 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 611
 612 static bool
 613 abnormal_ssa_name_p (tree exp)
 614 {
 615   if (!exp)
 616     return false;
 617
 618   if (TREE_CODE (exp) != SSA_NAME)
 619     return false;
 620
 621   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 622 }
 623
 624 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 625    abnormal phi node.  Callback for for_each_index.  */
 626
 627 static bool
 628 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 629                                   void *data ATTRIBUTE_UNUSED)
 630 {
 631   if (TREE_CODE (base) == ARRAY_REF)
 632     {
 633       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 634         return false;
 635       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 636         return false;
 637     }
 638
 639   return !abnormal_ssa_name_p (*index);
 640 }
 641
 642 /* Returns true if EXPR contains a ssa name that occurs in an
 643    abnormal phi node.  */
 644
 645 bool
 646 contains_abnormal_ssa_name_p (tree expr)
 647 {
 648   enum tree_code code;
 649   enum tree_code_class codeclass;
 650
 651   if (!expr)
 652     return false;
 653
 654   code = TREE_CODE (expr);
 655   codeclass = TREE_CODE_CLASS (code);
 656
 657   if (code == SSA_NAME)
 658     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 659
 660   if (code == INTEGER_CST
 661       || is_gimple_min_invariant (expr))
 662     return false;
 663
 664   if (code == ADDR_EXPR)
 665     return !for_each_index (&TREE_OPERAND (expr, 0),
 666                             idx_contains_abnormal_ssa_name_p,
 667                             NULL);
 668
 669   switch (codeclass)
 670     {
 671     case tcc_binary:
 672     case tcc_comparison:
 673       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 674         return true;
 675
 676       /* Fallthru.  */
 677     case tcc_unary:
 678       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 679         return true;
 680
 681       break;
 682
 683     default:
 684       gcc_unreachable ();
 685     }
 686
 687   return false;
 688 }
 689
 690 /*  Returns tree describing number of iterations determined from
 691     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 692
 693 static tree
 694 niter_for_exit (struct ivopts_data *data, edge exit)
 695 {
 696   struct tree_niter_desc desc;
 697   tree niter;
 698   void **slot;
 699
 700   if (!data->niters)
 701     {
 702       data->niters = pointer_map_create ();
 703       slot = NULL;
 704     }
 705   else
 706     slot = pointer_map_contains (data->niters, exit);
 707
 708   if (!slot)
 709     {
 710       /* Try to determine number of iterations.  We must know it
 711          unconditionally (i.e., without possibility of # of iterations
 712          being zero).  Also, we cannot safely work with ssa names that
 713          appear in phi nodes on abnormal edges, so that we do not create
 714          overlapping life ranges for them (PR 27283).  */
 715       if (number_of_iterations_exit (data->current_loop,
 716                                      exit, &desc, true)
 717           && integer_zerop (desc.may_be_zero)
 718           && !contains_abnormal_ssa_name_p (desc.niter))
 719         niter = desc.niter;
 720       else
 721         niter = NULL_TREE;
 722
 723       *pointer_map_insert (data->niters, exit) = niter;
 724     }
 725   else
 726     niter = (tree) *slot;
 727
 728   return niter;
 729 }
 730
 731 /* Returns tree describing number of iterations determined from
 732    single dominating exit of DATA->current_loop, or NULL if something
 733    goes wrong.  */
 734
 735 static tree
 736 niter_for_single_dom_exit (struct ivopts_data *data)
 737 {
 738   edge exit = single_dom_exit (data->current_loop);
 739
 740   if (!exit)
 741     return NULL;
 742
 743   return niter_for_exit (data, exit);
 744 }
 745
 746 /* Initializes data structures used by the iv optimization pass, stored
 747    in DATA.  */
 748
 749 static void
 750 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 751 {
 752   data->version_info_size = 2 * num_ssa_names;
 753   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 754   data->relevant = BITMAP_ALLOC (NULL);
 755   data->important_candidates = BITMAP_ALLOC (NULL);
 756   data->max_inv_id = 0;
 757   data->niters = NULL;
 758   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 759   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 760   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 761 }
 762
 763 /* Returns a memory object to that EXPR points.  In case we are able to
 764    determine that it does not point to any such object, NULL is returned.  */
 765
 766 static tree
 767 determine_base_object (tree expr)
 768 {
 769   enum tree_code code = TREE_CODE (expr);
 770   tree base, obj;
 771
 772   /* If this is a pointer casted to any type, we need to determine
 773      the base object for the pointer; so handle conversions before
 774      throwing away non-pointer expressions.  */
 775   if (CONVERT_EXPR_P (expr))
 776     return determine_base_object (TREE_OPERAND (expr, 0));
 777
 778   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 779     return NULL_TREE;
 780
 781   switch (code)
 782     {
 783     case INTEGER_CST:
 784       return NULL_TREE;
 785
 786     case ADDR_EXPR:
 787       obj = TREE_OPERAND (expr, 0);
 788       base = get_base_address (obj);
 789
 790       if (!base)
 791         return expr;
 792
 793       if (TREE_CODE (base) == INDIRECT_REF)
 794         return determine_base_object (TREE_OPERAND (base, 0));
 795
 796       return fold_convert (ptr_type_node,
 797                            build_fold_addr_expr (base));
 798
 799     case POINTER_PLUS_EXPR:
 800       return determine_base_object (TREE_OPERAND (expr, 0));
 801
 802     case PLUS_EXPR:
 803     case MINUS_EXPR:
 804       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 805       gcc_unreachable ();
 806
 807     default:
 808       return fold_convert (ptr_type_node, expr);
 809     }
 810 }
 811
 812 /* Allocates an induction variable with given initial value BASE and step STEP
 813    for loop LOOP.  */
 814
 815 static struct iv *
 816 alloc_iv (tree base, tree step)
 817 {
 818   struct iv *iv = XCNEW (struct iv);
 819   gcc_assert (step != NULL_TREE);
 820
 821   iv->base = base;
 822   iv->base_object = determine_base_object (base);
 823   iv->step = step;
 824   iv->biv_p = false;
 825   iv->have_use_for = false;
 826   iv->use_id = 0;
 827   iv->ssa_name = NULL_TREE;
 828
 829   return iv;
 830 }
 831
 832 /* Sets STEP and BASE for induction variable IV.  */
 833
 834 static void
 835 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 836 {
 837   struct version_info *info = name_info (data, iv);
 838
 839   gcc_assert (!info->iv);
 840
 841   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 842   info->iv = alloc_iv (base, step);
 843   info->iv->ssa_name = iv;
 844 }
 845
 846 /* Finds induction variable declaration for VAR.  */
 847
 848 static struct iv *
 849 get_iv (struct ivopts_data *data, tree var)
 850 {
 851   basic_block bb;
 852   tree type = TREE_TYPE (var);
 853
 854   if (!POINTER_TYPE_P (type)
 855       && !INTEGRAL_TYPE_P (type))
 856     return NULL;
 857
 858   if (!name_info (data, var)->iv)
 859     {
 860       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 861
 862       if (!bb
 863           || !flow_bb_inside_loop_p (data->current_loop, bb))
 864         set_iv (data, var, var, build_int_cst (type, 0));
 865     }
 866
 867   return name_info (data, var)->iv;
 868 }
 869
 870 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 871    not define a simple affine biv with nonzero step.  */
 872
 873 static tree
 874 determine_biv_step (tree phi)
 875 {
 876   struct loop *loop = bb_for_stmt (phi)->loop_father;
 877   tree name = PHI_RESULT (phi);
 878   affine_iv iv;
 879
 880   if (!is_gimple_reg (name))
 881     return NULL_TREE;
 882
 883   if (!simple_iv (loop, phi, name, &iv, true))
 884     return NULL_TREE;
 885
 886   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 887 }
 888
 889 /* Finds basic ivs.  */
 890
 891 static bool
 892 find_bivs (struct ivopts_data *data)
 893 {
 894   tree phi, step, type, base;
 895   bool found = false;
 896   struct loop *loop = data->current_loop;
 897
 898   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 899     {
 900       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 901         continue;
 902
 903       step = determine_biv_step (phi);
 904       if (!step)
 905         continue;
 906
 907       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 908       base = expand_simple_operations (base);
 909       if (contains_abnormal_ssa_name_p (base)
 910           || contains_abnormal_ssa_name_p (step))
 911         continue;
 912
 913       type = TREE_TYPE (PHI_RESULT (phi));
 914       base = fold_convert (type, base);
 915       if (step)
 916         {
 917           if (POINTER_TYPE_P (type))
 918             step = fold_convert (sizetype, step);
 919           else
 920             step = fold_convert (type, step);
 921         }
 922
 923       set_iv (data, PHI_RESULT (phi), base, step);
 924       found = true;
 925     }
 926
 927   return found;
 928 }
 929
 930 /* Marks basic ivs.  */
 931
 932 static void
 933 mark_bivs (struct ivopts_data *data)
 934 {
 935   tree phi, var;
 936   struct iv *iv, *incr_iv;
 937   struct loop *loop = data->current_loop;
 938   basic_block incr_bb;
 939
 940   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 941     {
 942       iv = get_iv (data, PHI_RESULT (phi));
 943       if (!iv)
 944         continue;
 945
 946       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 947       incr_iv = get_iv (data, var);
 948       if (!incr_iv)
 949         continue;
 950
 951       /* If the increment is in the subloop, ignore it.  */
 952       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 953       if (incr_bb->loop_father != data->current_loop
 954           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 955         continue;
 956
 957       iv->biv_p = true;
 958       incr_iv->biv_p = true;
 959     }
 960 }
 961
 962 /* Checks whether STMT defines a linear induction variable and stores its
 963    parameters to IV.  */
 964
 965 static bool
 966 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 967 {
 968   tree lhs;
 969   struct loop *loop = data->current_loop;
 970
 971   iv->base = NULL_TREE;
 972   iv->step = NULL_TREE;
 973
 974   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 975     return false;
 976
 977   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 978   if (TREE_CODE (lhs) != SSA_NAME)
 979     return false;
 980
 981   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 982     return false;
 983   iv->base = expand_simple_operations (iv->base);
 984
 985   if (contains_abnormal_ssa_name_p (iv->base)
 986       || contains_abnormal_ssa_name_p (iv->step))
 987     return false;
 988
 989   return true;
 990 }
 991
 992 /* Finds general ivs in statement STMT.  */
 993
 994 static void
 995 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
 996 {
 997   affine_iv iv;
 998
 999   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1000     return;
1001
1002   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1003 }
1004
1005 /* Finds general ivs in basic block BB.  */
1006
1007 static void
1008 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1009 {
1010   block_stmt_iterator bsi;
1011
1012   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1013     find_givs_in_stmt (data, bsi_stmt (bsi));
1014 }
1015
1016 /* Finds general ivs.  */
1017
1018 static void
1019 find_givs (struct ivopts_data *data)
1020 {
1021   struct loop *loop = data->current_loop;
1022   basic_block *body = get_loop_body_in_dom_order (loop);
1023   unsigned i;
1024
1025   for (i = 0; i < loop->num_nodes; i++)
1026     find_givs_in_bb (data, body[i]);
1027   free (body);
1028 }
1029
1030 /* For each ssa name defined in LOOP determines whether it is an induction
1031    variable and if so, its initial value and step.  */
1032
1033 static bool
1034 find_induction_variables (struct ivopts_data *data)
1035 {
1036   unsigned i;
1037   bitmap_iterator bi;
1038
1039   if (!find_bivs (data))
1040     return false;
1041
1042   find_givs (data);
1043   mark_bivs (data);
1044
1045   if (dump_file && (dump_flags & TDF_DETAILS))
1046     {
1047       tree niter = niter_for_single_dom_exit (data);
1048
1049       if (niter)
1050         {
1051           fprintf (dump_file, "  number of iterations ");
1052           print_generic_expr (dump_file, niter, TDF_SLIM);
1053           fprintf (dump_file, "\n\n");
1054         };
1055
1056       fprintf (dump_file, "Induction variables:\n\n");
1057
1058       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1059         {
1060           if (ver_info (data, i)->iv)
1061             dump_iv (dump_file, ver_info (data, i)->iv);
1062         }
1063     }
1064
1065   return true;
1066 }
1067
1068 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1069
1070 static struct iv_use *
1071 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1072             tree stmt, enum use_type use_type)
1073 {
1074   struct iv_use *use = XCNEW (struct iv_use);
1075
1076   use->id = n_iv_uses (data);
1077   use->type = use_type;
1078   use->iv = iv;
1079   use->stmt = stmt;
1080   use->op_p = use_p;
1081   use->related_cands = BITMAP_ALLOC (NULL);
1082
1083   /* To avoid showing ssa name in the dumps, if it was not reset by the
1084      caller.  */
1085   iv->ssa_name = NULL_TREE;
1086
1087   if (dump_file && (dump_flags & TDF_DETAILS))
1088     dump_use (dump_file, use);
1089
1090   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1091
1092   return use;
1093 }
1094
1095 /* Checks whether OP is a loop-level invariant and if so, records it.
1096    NONLINEAR_USE is true if the invariant is used in a way we do not
1097    handle specially.  */
1098
1099 static void
1100 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1101 {
1102   basic_block bb;
1103   struct version_info *info;
1104
1105   if (TREE_CODE (op) != SSA_NAME
1106       || !is_gimple_reg (op))
1107     return;
1108
1109   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1110   if (bb
1111       && flow_bb_inside_loop_p (data->current_loop, bb))
1112     return;
1113
1114   info = name_info (data, op);
1115   info->name = op;
1116   info->has_nonlin_use |= nonlinear_use;
1117   if (!info->inv_id)
1118     info->inv_id = ++data->max_inv_id;
1119   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1120 }
1121
1122 /* Checks whether the use OP is interesting and if so, records it.  */
1123
1124 static struct iv_use *
1125 find_interesting_uses_op (struct ivopts_data *data, tree op)
1126 {
1127   struct iv *iv;
1128   struct iv *civ;
1129   tree stmt;
1130   struct iv_use *use;
1131
1132   if (TREE_CODE (op) != SSA_NAME)
1133     return NULL;
1134
1135   iv = get_iv (data, op);
1136   if (!iv)
1137     return NULL;
1138
1139   if (iv->have_use_for)
1140     {
1141       use = iv_use (data, iv->use_id);
1142
1143       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1144       return use;
1145     }
1146
1147   if (integer_zerop (iv->step))
1148     {
1149       record_invariant (data, op, true);
1150       return NULL;
1151     }
1152   iv->have_use_for = true;
1153
1154   civ = XNEW (struct iv);
1155   *civ = *iv;
1156
1157   stmt = SSA_NAME_DEF_STMT (op);
1158   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1159               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1160
1161   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1162   iv->use_id = use->id;
1163
1164   return use;
1165 }
1166
1167 /* Given a condition *COND_P, checks whether it is a compare of an induction
1168    variable and an invariant.  If this is the case, CONTROL_VAR is set
1169    to location of the iv, BOUND to the location of the invariant,
1170    IV_VAR and IV_BOUND are set to the corresponding induction variable
1171    descriptions, and true is returned.  If this is not the case,
1172    CONTROL_VAR and BOUND are set to the arguments of the condition and
1173    false is returned.  */
1174
1175 static bool
1176 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1177                        tree **control_var, tree **bound,
1178                        struct iv **iv_var, struct iv **iv_bound)
1179 {
1180   /* The nodes returned when COND has just one operand.  Note that you should
1181      not modify anything in BOUND or IV_BOUND because of this.  */
1182   static struct iv const_iv;
1183   static tree zero;
1184   tree cond = *cond_p;
1185   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1186   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1187   bool ret = false;
1188
1189   zero = integer_zero_node;
1190   const_iv.step = integer_zero_node;
1191
1192   if (TREE_CODE (cond) == SSA_NAME)
1193     {
1194       op0 = cond_p;
1195       iv0 = get_iv (data, cond);
1196       ret = (iv0 && !integer_zerop (iv0->step));
1197       goto end;
1198     }
1199
1200   if (!COMPARISON_CLASS_P (cond))
1201     {
1202       op0 = cond_p;
1203       goto end;
1204     }
1205
1206   op0 = &TREE_OPERAND (cond, 0);
1207   op1 = &TREE_OPERAND (cond, 1);
1208   if (TREE_CODE (*op0) == SSA_NAME)
1209     iv0 = get_iv (data, *op0);
1210   if (TREE_CODE (*op1) == SSA_NAME)
1211     iv1 = get_iv (data, *op1);
1212
1213   /* Exactly one of the compared values must be an iv, and the other one must
1214      be an invariant.  */
1215   if (!iv0 || !iv1)
1216     goto end;
1217
1218   if (integer_zerop (iv0->step))
1219     {
1220       /* Control variable may be on the other side.  */
1221       tmp_op = op0; op0 = op1; op1 = tmp_op;
1222       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1223     }
1224   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1225
1226 end:
1227   if (control_var)
1228     *control_var = op0;;
1229   if (iv_var)
1230     *iv_var = iv0;;
1231   if (bound)
1232     *bound = op1;
1233   if (iv_bound)
1234     *iv_bound = iv1;
1235
1236   return ret;
1237 }
1238
1239 /* Checks whether the condition *COND_P in STMT is interesting
1240    and if so, records it.  */
1241
1242 static void
1243 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1244 {
1245   tree *var_p, *bound_p;
1246   struct iv *var_iv, *civ;
1247
1248   if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1249     {
1250       find_interesting_uses_op (data, *var_p);
1251       find_interesting_uses_op (data, *bound_p);
1252       return;
1253     }
1254
1255   civ = XNEW (struct iv);
1256   *civ = *var_iv;
1257   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1258 }
1259
1260 /* Returns true if expression EXPR is obviously invariant in LOOP,
1261    i.e. if all its operands are defined outside of the LOOP.  LOOP
1262    should not be the function body.  */
1263
1264 bool
1265 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1266 {
1267   basic_block def_bb;
1268   unsigned i, len;
1269
1270   gcc_assert (loop_depth (loop) > 0);
1271
1272   if (is_gimple_min_invariant (expr))
1273     return true;
1274
1275   if (TREE_CODE (expr) == SSA_NAME)
1276     {
1277       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1278       if (def_bb
1279           && flow_bb_inside_loop_p (loop, def_bb))
1280         return false;
1281
1282       return true;
1283     }
1284
1285   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1286     return false;
1287
1288   len = TREE_OPERAND_LENGTH (expr);
1289   for (i = 0; i < len; i++)
1290     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1291       return false;
1292
1293   return true;
1294 }
1295
1296 /* Cumulates the steps of indices into DATA and replaces their values with the
1297    initial ones.  Returns false when the value of the index cannot be determined.
1298    Callback for for_each_index.  */
1299
1300 struct ifs_ivopts_data
1301 {
1302   struct ivopts_data *ivopts_data;
1303   tree stmt;
1304   tree step;
1305 };
1306
1307 static bool
1308 idx_find_step (tree base, tree *idx, void *data)
1309 {
1310   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1311   struct iv *iv;
1312   tree step, iv_base, iv_step, lbound, off;
1313   struct loop *loop = dta->ivopts_data->current_loop;
1314
1315   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1316       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1317     return false;
1318
1319   /* If base is a component ref, require that the offset of the reference
1320      be invariant.  */
1321   if (TREE_CODE (base) == COMPONENT_REF)
1322     {
1323       off = component_ref_field_offset (base);
1324       return expr_invariant_in_loop_p (loop, off);
1325     }
1326
1327   /* If base is array, first check whether we will be able to move the
1328      reference out of the loop (in order to take its address in strength
1329      reduction).  In order for this to work we need both lower bound
1330      and step to be loop invariants.  */
1331   if (TREE_CODE (base) == ARRAY_REF)
1332     {
1333       step = array_ref_element_size (base);
1334       lbound = array_ref_low_bound (base);
1335
1336       if (!expr_invariant_in_loop_p (loop, step)
1337           || !expr_invariant_in_loop_p (loop, lbound))
1338         return false;
1339     }
1340
1341   if (TREE_CODE (*idx) != SSA_NAME)
1342     return true;
1343
1344   iv = get_iv (dta->ivopts_data, *idx);
1345   if (!iv)
1346     return false;
1347
1348   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1349           *&x[0], which is not folded and does not trigger the
1350           ARRAY_REF path below.  */
1351   *idx = iv->base;
1352
1353   if (integer_zerop (iv->step))
1354     return true;
1355
1356   if (TREE_CODE (base) == ARRAY_REF)
1357     {
1358       step = array_ref_element_size (base);
1359
1360       /* We only handle addresses whose step is an integer constant.  */
1361       if (TREE_CODE (step) != INTEGER_CST)
1362         return false;
1363     }
1364   else
1365     /* The step for pointer arithmetics already is 1 byte.  */
1366     step = build_int_cst (sizetype, 1);
1367
1368   iv_base = iv->base;
1369   iv_step = iv->step;
1370   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1371                             sizetype, &iv_base, &iv_step, dta->stmt,
1372                             false))
1373     {
1374       /* The index might wrap.  */
1375       return false;
1376     }
1377
1378   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1379   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1380
1381   return true;
1382 }
1383
1384 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1385    object is passed to it in DATA.  */
1386
1387 static bool
1388 idx_record_use (tree base, tree *idx,
1389                 void *vdata)
1390 {
1391   struct ivopts_data *data = (struct ivopts_data *) vdata;
1392   find_interesting_uses_op (data, *idx);
1393   if (TREE_CODE (base) == ARRAY_REF)
1394     {
1395       find_interesting_uses_op (data, array_ref_element_size (base));
1396       find_interesting_uses_op (data, array_ref_low_bound (base));
1397     }
1398   return true;
1399 }
1400
1401 /* If we can prove that TOP = cst * BOT for some constant cst,
1402    store cst to MUL and return true.  Otherwise return false.
1403    The returned value is always sign-extended, regardless of the
1404    signedness of TOP and BOT.  */
1405
1406 static bool
1407 constant_multiple_of (tree top, tree bot, double_int *mul)
1408 {
1409   tree mby;
1410   enum tree_code code;
1411   double_int res, p0, p1;
1412   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1413
1414   STRIP_NOPS (top);
1415   STRIP_NOPS (bot);
1416
1417   if (operand_equal_p (top, bot, 0))
1418     {
1419       *mul = double_int_one;
1420       return true;
1421     }
1422
1423   code = TREE_CODE (top);
1424   switch (code)
1425     {
1426     case MULT_EXPR:
1427       mby = TREE_OPERAND (top, 1);
1428       if (TREE_CODE (mby) != INTEGER_CST)
1429         return false;
1430
1431       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1432         return false;
1433
1434       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1435                               precision);
1436       return true;
1437
1438     case PLUS_EXPR:
1439     case MINUS_EXPR:
1440       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1441           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1442         return false;
1443
1444       if (code == MINUS_EXPR)
1445         p1 = double_int_neg (p1);
1446       *mul = double_int_sext (double_int_add (p0, p1), precision);
1447       return true;
1448
1449     case INTEGER_CST:
1450       if (TREE_CODE (bot) != INTEGER_CST)
1451         return false;
1452
1453       p0 = double_int_sext (tree_to_double_int (top), precision);
1454       p1 = double_int_sext (tree_to_double_int (bot), precision);
1455       if (double_int_zero_p (p1))
1456         return false;
1457       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1458                               precision);
1459       return double_int_zero_p (res);
1460
1461     default:
1462       return false;
1463     }
1464 }
1465
1466 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1467
1468 static bool
1469 may_be_unaligned_p (tree ref, tree step)
1470 {
1471   tree base;
1472   tree base_type;
1473   HOST_WIDE_INT bitsize;
1474   HOST_WIDE_INT bitpos;
1475   tree toffset;
1476   enum machine_mode mode;
1477   int unsignedp, volatilep;
1478   unsigned base_align;
1479
1480   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1481      thus they are not misaligned.  */
1482   if (TREE_CODE (ref) == TARGET_MEM_REF)
1483     return false;
1484
1485   /* The test below is basically copy of what expr.c:normal_inner_ref
1486      does to check whether the object must be loaded by parts when
1487      STRICT_ALIGNMENT is true.  */
1488   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1489                               &unsignedp, &volatilep, true);
1490   base_type = TREE_TYPE (base);
1491   base_align = TYPE_ALIGN (base_type);
1492
1493   if (mode != BLKmode)
1494     {
1495       double_int mul;
1496       tree al = build_int_cst (TREE_TYPE (step),
1497                                GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1498
1499       if (base_align < GET_MODE_ALIGNMENT (mode)
1500           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1501           || bitpos % BITS_PER_UNIT != 0)
1502         return true;
1503
1504       if (!constant_multiple_of (step, al, &mul))
1505         return true;
1506     }
1507
1508   return false;
1509 }
1510
1511 /* Return true if EXPR may be non-addressable.   */
1512
1513 static bool
1514 may_be_nonaddressable_p (tree expr)
1515 {
1516   switch (TREE_CODE (expr))
1517     {
1518     case TARGET_MEM_REF:
1519       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1520          target, thus they are always addressable.  */
1521       return false;
1522
1523     case COMPONENT_REF:
1524       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1525              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1526
1527     case VIEW_CONVERT_EXPR:
1528       /* This kind of view-conversions may wrap non-addressable objects
1529          and make them look addressable.  After some processing the
1530          non-addressability may be uncovered again, causing ADDR_EXPRs
1531          of inappropriate objects to be built.  */
1532       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1533           || is_gimple_min_invariant (TREE_OPERAND (expr, 0)))
1534         return true;
1535
1536       /* ... fall through ... */
1537
1538     case ARRAY_REF:
1539     case ARRAY_RANGE_REF:
1540       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1541
1542     CASE_CONVERT:
1543       return true;
1544
1545     default:
1546       break;
1547     }
1548
1549   return false;
1550 }
1551
1552 /* Finds addresses in *OP_P inside STMT.  */
1553
1554 static void
1555 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1556 {
1557   tree base = *op_p, step = build_int_cst (sizetype, 0);
1558   struct iv *civ;
1559   struct ifs_ivopts_data ifs_ivopts_data;
1560
1561   /* Do not play with volatile memory references.  A bit too conservative,
1562      perhaps, but safe.  */
1563   if (stmt_ann (stmt)->has_volatile_ops)
1564     goto fail;
1565
1566   /* Ignore bitfields for now.  Not really something terribly complicated
1567      to handle.  TODO.  */
1568   if (TREE_CODE (base) == BIT_FIELD_REF)
1569     goto fail;
1570
1571   base = unshare_expr (base);
1572
1573   if (TREE_CODE (base) == TARGET_MEM_REF)
1574     {
1575       tree type = build_pointer_type (TREE_TYPE (base));
1576       tree astep;
1577
1578       if (TMR_BASE (base)
1579           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1580         {
1581           civ = get_iv (data, TMR_BASE (base));
1582           if (!civ)
1583             goto fail;
1584
1585           TMR_BASE (base) = civ->base;
1586           step = civ->step;
1587         }
1588       if (TMR_INDEX (base)
1589           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1590         {
1591           civ = get_iv (data, TMR_INDEX (base));
1592           if (!civ)
1593             goto fail;
1594
1595           TMR_INDEX (base) = civ->base;
1596           astep = civ->step;
1597
1598           if (astep)
1599             {
1600               if (TMR_STEP (base))
1601                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1602
1603               step = fold_build2 (PLUS_EXPR, type, step, astep);
1604             }
1605         }
1606
1607       if (integer_zerop (step))
1608         goto fail;
1609       base = tree_mem_ref_addr (type, base);
1610     }
1611   else
1612     {
1613       ifs_ivopts_data.ivopts_data = data;
1614       ifs_ivopts_data.stmt = stmt;
1615       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1616       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1617           || integer_zerop (ifs_ivopts_data.step))
1618         goto fail;
1619       step = ifs_ivopts_data.step;
1620
1621       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1622       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1623
1624       /* Check that the base expression is addressable.  This needs
1625          to be done after substituting bases of IVs into it.  */
1626       if (may_be_nonaddressable_p (base))
1627         goto fail;
1628
1629       /* Moreover, on strict alignment platforms, check that it is
1630          sufficiently aligned.  */
1631       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1632         goto fail;
1633
1634       base = build_fold_addr_expr (base);
1635
1636       /* Substituting bases of IVs into the base expression might
1637          have caused folding opportunities.  */
1638       if (TREE_CODE (base) == ADDR_EXPR)
1639         {
1640           tree *ref = &TREE_OPERAND (base, 0);
1641           while (handled_component_p (*ref))
1642             ref = &TREE_OPERAND (*ref, 0);
1643           if (TREE_CODE (*ref) == INDIRECT_REF)
1644             *ref = fold_indirect_ref (*ref);
1645         }
1646     }
1647
1648   civ = alloc_iv (base, step);
1649   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1650   return;
1651
1652 fail:
1653   for_each_index (op_p, idx_record_use, data);
1654 }
1655
1656 /* Finds and records invariants used in STMT.  */
1657
1658 static void
1659 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1660 {
1661   ssa_op_iter iter;
1662   use_operand_p use_p;
1663   tree op;
1664
1665   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1666     {
1667       op = USE_FROM_PTR (use_p);
1668       record_invariant (data, op, false);
1669     }
1670 }
1671
1672 /* Finds interesting uses of induction variables in the statement STMT.  */
1673
1674 static void
1675 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1676 {
1677   struct iv *iv;
1678   tree op, lhs, rhs;
1679   ssa_op_iter iter;
1680   use_operand_p use_p;
1681
1682   find_invariants_stmt (data, stmt);
1683
1684   if (TREE_CODE (stmt) == COND_EXPR)
1685     {
1686       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1687       return;
1688     }
1689
1690   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1691     {
1692       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1693       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1694
1695       if (TREE_CODE (lhs) == SSA_NAME)
1696         {
1697           /* If the statement defines an induction variable, the uses are not
1698              interesting by themselves.  */
1699
1700           iv = get_iv (data, lhs);
1701
1702           if (iv && !integer_zerop (iv->step))
1703             return;
1704         }
1705
1706       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1707         {
1708         case tcc_comparison:
1709           find_interesting_uses_cond (data, stmt,
1710                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1711           return;
1712
1713         case tcc_reference:
1714           find_interesting_uses_address (data, stmt,
1715                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1716           if (REFERENCE_CLASS_P (lhs))
1717             find_interesting_uses_address (data, stmt,
1718                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1719           return;
1720
1721         default: ;
1722         }
1723
1724       if (REFERENCE_CLASS_P (lhs)
1725           && is_gimple_val (rhs))
1726         {
1727           find_interesting_uses_address (data, stmt,
1728                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1729           find_interesting_uses_op (data, rhs);
1730           return;
1731         }
1732
1733       /* TODO -- we should also handle address uses of type
1734
1735          memory = call (whatever);
1736
1737          and
1738
1739          call (memory).  */
1740     }
1741
1742   if (TREE_CODE (stmt) == PHI_NODE
1743       && bb_for_stmt (stmt) == data->current_loop->header)
1744     {
1745       lhs = PHI_RESULT (stmt);
1746       iv = get_iv (data, lhs);
1747
1748       if (iv && !integer_zerop (iv->step))
1749         return;
1750     }
1751
1752   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1753     {
1754       op = USE_FROM_PTR (use_p);
1755
1756       if (TREE_CODE (op) != SSA_NAME)
1757         continue;
1758
1759       iv = get_iv (data, op);
1760       if (!iv)
1761         continue;
1762
1763       find_interesting_uses_op (data, op);
1764     }
1765 }
1766
1767 /* Finds interesting uses of induction variables outside of loops
1768    on loop exit edge EXIT.  */
1769
1770 static void
1771 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1772 {
1773   tree phi, def;
1774
1775   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1776     {
1777       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1778       if (is_gimple_reg (def))
1779         find_interesting_uses_op (data, def);
1780     }
1781 }
1782
1783 /* Finds uses of the induction variables that are interesting.  */
1784
1785 static void
1786 find_interesting_uses (struct ivopts_data *data)
1787 {
1788   basic_block bb;
1789   block_stmt_iterator bsi;
1790   tree phi;
1791   basic_block *body = get_loop_body (data->current_loop);
1792   unsigned i;
1793   struct version_info *info;
1794   edge e;
1795
1796   if (dump_file && (dump_flags & TDF_DETAILS))
1797     fprintf (dump_file, "Uses:\n\n");
1798
1799   for (i = 0; i < data->current_loop->num_nodes; i++)
1800     {
1801       edge_iterator ei;
1802       bb = body[i];
1803
1804       FOR_EACH_EDGE (e, ei, bb->succs)
1805         if (e->dest != EXIT_BLOCK_PTR
1806             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1807           find_interesting_uses_outside (data, e);
1808
1809       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1810         find_interesting_uses_stmt (data, phi);
1811       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1812         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1813     }
1814
1815   if (dump_file && (dump_flags & TDF_DETAILS))
1816     {
1817       bitmap_iterator bi;
1818
1819       fprintf (dump_file, "\n");
1820
1821       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1822         {
1823           info = ver_info (data, i);
1824           if (info->inv_id)
1825             {
1826               fprintf (dump_file, "  ");
1827               print_generic_expr (dump_file, info->name, TDF_SLIM);
1828               fprintf (dump_file, " is invariant (%d)%s\n",
1829                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1830             }
1831         }
1832
1833       fprintf (dump_file, "\n");
1834     }
1835
1836   free (body);
1837 }
1838
1839 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1840    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1841    we are at the top-level of the processed address.  */
1842
1843 static tree
1844 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1845                 unsigned HOST_WIDE_INT *offset)
1846 {
1847   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1848   enum tree_code code;
1849   tree type, orig_type = TREE_TYPE (expr);
1850   unsigned HOST_WIDE_INT off0, off1, st;
1851   tree orig_expr = expr;
1852
1853   STRIP_NOPS (expr);
1854
1855   type = TREE_TYPE (expr);
1856   code = TREE_CODE (expr);
1857   *offset = 0;
1858
1859   switch (code)
1860     {
1861     case INTEGER_CST:
1862       if (!cst_and_fits_in_hwi (expr)
1863           || integer_zerop (expr))
1864         return orig_expr;
1865
1866       *offset = int_cst_value (expr);
1867       return build_int_cst (orig_type, 0);
1868
1869     case POINTER_PLUS_EXPR:
1870     case PLUS_EXPR:
1871     case MINUS_EXPR:
1872       op0 = TREE_OPERAND (expr, 0);
1873       op1 = TREE_OPERAND (expr, 1);
1874
1875       op0 = strip_offset_1 (op0, false, false, &off0);
1876       op1 = strip_offset_1 (op1, false, false, &off1);
1877
1878       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1879       if (op0 == TREE_OPERAND (expr, 0)
1880           && op1 == TREE_OPERAND (expr, 1))
1881         return orig_expr;
1882
1883       if (integer_zerop (op1))
1884         expr = op0;
1885       else if (integer_zerop (op0))
1886         {
1887           if (code == MINUS_EXPR)
1888             expr = fold_build1 (NEGATE_EXPR, type, op1);
1889           else
1890             expr = op1;
1891         }
1892       else
1893         expr = fold_build2 (code, type, op0, op1);
1894
1895       return fold_convert (orig_type, expr);
1896
1897     case ARRAY_REF:
1898       if (!inside_addr)
1899         return orig_expr;
1900
1901       step = array_ref_element_size (expr);
1902       if (!cst_and_fits_in_hwi (step))
1903         break;
1904
1905       st = int_cst_value (step);
1906       op1 = TREE_OPERAND (expr, 1);
1907       op1 = strip_offset_1 (op1, false, false, &off1);
1908       *offset = off1 * st;
1909
1910       if (top_compref
1911           && integer_zerop (op1))
1912         {
1913           /* Strip the component reference completely.  */
1914           op0 = TREE_OPERAND (expr, 0);
1915           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1916           *offset += off0;
1917           return op0;
1918         }
1919       break;
1920
1921     case COMPONENT_REF:
1922       if (!inside_addr)
1923         return orig_expr;
1924
1925       tmp = component_ref_field_offset (expr);
1926       if (top_compref
1927           && cst_and_fits_in_hwi (tmp))
1928         {
1929           /* Strip the component reference completely.  */
1930           op0 = TREE_OPERAND (expr, 0);
1931           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1932           *offset = off0 + int_cst_value (tmp);
1933           return op0;
1934         }
1935       break;
1936
1937     case ADDR_EXPR:
1938       op0 = TREE_OPERAND (expr, 0);
1939       op0 = strip_offset_1 (op0, true, true, &off0);
1940       *offset += off0;
1941
1942       if (op0 == TREE_OPERAND (expr, 0))
1943         return orig_expr;
1944
1945       expr = build_fold_addr_expr (op0);
1946       return fold_convert (orig_type, expr);
1947
1948     case INDIRECT_REF:
1949       inside_addr = false;
1950       break;
1951
1952     default:
1953       return orig_expr;
1954     }
1955
1956   /* Default handling of expressions for that we want to recurse into
1957      the first operand.  */
1958   op0 = TREE_OPERAND (expr, 0);
1959   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1960   *offset += off0;
1961
1962   if (op0 == TREE_OPERAND (expr, 0)
1963       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1964     return orig_expr;
1965
1966   expr = copy_node (expr);
1967   TREE_OPERAND (expr, 0) = op0;
1968   if (op1)
1969     TREE_OPERAND (expr, 1) = op1;
1970
1971   /* Inside address, we might strip the top level component references,
1972      thus changing type of the expression.  Handling of ADDR_EXPR
1973      will fix that.  */
1974   expr = fold_convert (orig_type, expr);
1975
1976   return expr;
1977 }
1978
1979 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1980
1981 static tree
1982 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1983 {
1984   return strip_offset_1 (expr, false, false, offset);
1985 }
1986
1987 /* Returns variant of TYPE that can be used as base for different uses.
1988    We return unsigned type with the same precision, which avoids problems
1989    with overflows.  */
1990
1991 static tree
1992 generic_type_for (tree type)
1993 {
1994   if (POINTER_TYPE_P (type))
1995     return unsigned_type_for (type);
1996
1997   if (TYPE_UNSIGNED (type))
1998     return type;
1999
2000   return unsigned_type_for (type);
2001 }
2002
2003 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2004    the bitmap to that we should store it.  */
2005
2006 static struct ivopts_data *fd_ivopts_data;
2007 static tree
2008 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2009 {
2010   bitmap *depends_on = (bitmap *) data;
2011   struct version_info *info;
2012
2013   if (TREE_CODE (*expr_p) != SSA_NAME)
2014     return NULL_TREE;
2015   info = name_info (fd_ivopts_data, *expr_p);
2016
2017   if (!info->inv_id || info->has_nonlin_use)
2018     return NULL_TREE;
2019
2020   if (!*depends_on)
2021     *depends_on = BITMAP_ALLOC (NULL);
2022   bitmap_set_bit (*depends_on, info->inv_id);
2023
2024   return NULL_TREE;
2025 }
2026
2027 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2028    position to POS.  If USE is not NULL, the candidate is set as related to
2029    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2030    replacement of the final value of the iv by a direct computation.  */
2031
2032 static struct iv_cand *
2033 add_candidate_1 (struct ivopts_data *data,
2034                  tree base, tree step, bool important, enum iv_position pos,
2035                  struct iv_use *use, tree incremented_at)
2036 {
2037   unsigned i;
2038   struct iv_cand *cand = NULL;
2039   tree type, orig_type;
2040
2041   if (base)
2042     {
2043       orig_type = TREE_TYPE (base);
2044       type = generic_type_for (orig_type);
2045       /* Don't convert the base to the generic type for pointers as the generic
2046          type is an integer type with the same size as the pointer type.  */
2047       if (type != orig_type && !POINTER_TYPE_P (orig_type))
2048         {
2049           base = fold_convert (type, base);
2050           step = fold_convert (type, step);
2051         }
2052     }
2053
2054   for (i = 0; i < n_iv_cands (data); i++)
2055     {
2056       cand = iv_cand (data, i);
2057
2058       if (cand->pos != pos)
2059         continue;
2060
2061       if (cand->incremented_at != incremented_at)
2062         continue;
2063
2064       if (!cand->iv)
2065         {
2066           if (!base && !step)
2067             break;
2068
2069           continue;
2070         }
2071
2072       if (!base && !step)
2073         continue;
2074
2075       if (operand_equal_p (base, cand->iv->base, 0)
2076           && operand_equal_p (step, cand->iv->step, 0))
2077         break;
2078     }
2079
2080   if (i == n_iv_cands (data))
2081     {
2082       cand = XCNEW (struct iv_cand);
2083       cand->id = i;
2084
2085       if (!base && !step)
2086         cand->iv = NULL;
2087       else
2088         cand->iv = alloc_iv (base, step);
2089
2090       cand->pos = pos;
2091       if (pos != IP_ORIGINAL && cand->iv)
2092         {
2093           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2094           cand->var_after = cand->var_before;
2095         }
2096       cand->important = important;
2097       cand->incremented_at = incremented_at;
2098       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2099
2100       if (step
2101           && TREE_CODE (step) != INTEGER_CST)
2102         {
2103           fd_ivopts_data = data;
2104           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2105         }
2106
2107       if (dump_file && (dump_flags & TDF_DETAILS))
2108         dump_cand (dump_file, cand);
2109     }
2110
2111   if (important && !cand->important)
2112     {
2113       cand->important = true;
2114       if (dump_file && (dump_flags & TDF_DETAILS))
2115         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2116     }
2117
2118   if (use)
2119     {
2120       bitmap_set_bit (use->related_cands, i);
2121       if (dump_file && (dump_flags & TDF_DETAILS))
2122         fprintf (dump_file, "Candidate %d is related to use %d\n",
2123                  cand->id, use->id);
2124     }
2125
2126   return cand;
2127 }
2128
2129 /* Returns true if incrementing the induction variable at the end of the LOOP
2130    is allowed.
2131
2132    The purpose is to avoid splitting latch edge with a biv increment, thus
2133    creating a jump, possibly confusing other optimization passes and leaving
2134    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2135    is not available (so we do not have a better alternative), or if the latch
2136    edge is already nonempty.  */
2137
2138 static bool
2139 allow_ip_end_pos_p (struct loop *loop)
2140 {
2141   if (!ip_normal_pos (loop))
2142     return true;
2143
2144   if (!empty_block_p (ip_end_pos (loop)))
2145     return true;
2146
2147   return false;
2148 }
2149
2150 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2151    position to POS.  If USE is not NULL, the candidate is set as related to
2152    it.  The candidate computation is scheduled on all available positions.  */
2153
2154 static void
2155 add_candidate (struct ivopts_data *data,
2156                tree base, tree step, bool important, struct iv_use *use)
2157 {
2158   if (ip_normal_pos (data->current_loop))
2159     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2160   if (ip_end_pos (data->current_loop)
2161       && allow_ip_end_pos_p (data->current_loop))
2162     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2163 }
2164
2165 /* Add a standard "0 + 1 * iteration" iv candidate for a
2166    type with SIZE bits.  */
2167
2168 static void
2169 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2170                                      unsigned int size)
2171 {
2172   tree type = lang_hooks.types.type_for_size (size, true);
2173   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2174                  true, NULL);
2175 }
2176
2177 /* Adds standard iv candidates.  */
2178
2179 static void
2180 add_standard_iv_candidates (struct ivopts_data *data)
2181 {
2182   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2183
2184   /* The same for a double-integer type if it is still fast enough.  */
2185   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2186     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2187 }
2188
2189
2190 /* Adds candidates bases on the old induction variable IV.  */
2191
2192 static void
2193 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2194 {
2195   tree phi, def;
2196   struct iv_cand *cand;
2197
2198   add_candidate (data, iv->base, iv->step, true, NULL);
2199
2200   /* The same, but with initial value zero.  */
2201   add_candidate (data,
2202                  build_int_cst (TREE_TYPE (iv->base), 0),
2203                  iv->step, true, NULL);
2204
2205   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2206   if (TREE_CODE (phi) == PHI_NODE)
2207     {
2208       /* Additionally record the possibility of leaving the original iv
2209          untouched.  */
2210       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2211       cand = add_candidate_1 (data,
2212                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2213                               SSA_NAME_DEF_STMT (def));
2214       cand->var_before = iv->ssa_name;
2215       cand->var_after = def;
2216     }
2217 }
2218
2219 /* Adds candidates based on the old induction variables.  */
2220
2221 static void
2222 add_old_ivs_candidates (struct ivopts_data *data)
2223 {
2224   unsigned i;
2225   struct iv *iv;
2226   bitmap_iterator bi;
2227
2228   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2229     {
2230       iv = ver_info (data, i)->iv;
2231       if (iv && iv->biv_p && !integer_zerop (iv->step))
2232         add_old_iv_candidates (data, iv);
2233     }
2234 }
2235
2236 /* Adds candidates based on the value of the induction variable IV and USE.  */
2237
2238 static void
2239 add_iv_value_candidates (struct ivopts_data *data,
2240                          struct iv *iv, struct iv_use *use)
2241 {
2242   unsigned HOST_WIDE_INT offset;
2243   tree base;
2244   tree basetype;
2245
2246   add_candidate (data, iv->base, iv->step, false, use);
2247
2248   /* The same, but with initial value zero.  Make such variable important,
2249      since it is generic enough so that possibly many uses may be based
2250      on it.  */
2251   basetype = TREE_TYPE (iv->base);
2252   if (POINTER_TYPE_P (basetype))
2253     basetype = sizetype;
2254   add_candidate (data, build_int_cst (basetype, 0),
2255                  iv->step, true, use);
2256
2257   /* Third, try removing the constant offset.  */
2258   base = strip_offset (iv->base, &offset);
2259   if (offset)
2260     add_candidate (data, base, iv->step, false, use);
2261 }
2262
2263 /* Adds candidates based on the uses.  */
2264
2265 static void
2266 add_derived_ivs_candidates (struct ivopts_data *data)
2267 {
2268   unsigned i;
2269
2270   for (i = 0; i < n_iv_uses (data); i++)
2271     {
2272       struct iv_use *use = iv_use (data, i);
2273
2274       if (!use)
2275         continue;
2276
2277       switch (use->type)
2278         {
2279         case USE_NONLINEAR_EXPR:
2280         case USE_COMPARE:
2281         case USE_ADDRESS:
2282           /* Just add the ivs based on the value of the iv used here.  */
2283           add_iv_value_candidates (data, use->iv, use);
2284           break;
2285
2286         default:
2287           gcc_unreachable ();
2288         }
2289     }
2290 }
2291
2292 /* Record important candidates and add them to related_cands bitmaps
2293    if needed.  */
2294
2295 static void
2296 record_important_candidates (struct ivopts_data *data)
2297 {
2298   unsigned i;
2299   struct iv_use *use;
2300
2301   for (i = 0; i < n_iv_cands (data); i++)
2302     {
2303       struct iv_cand *cand = iv_cand (data, i);
2304
2305       if (cand->important)
2306         bitmap_set_bit (data->important_candidates, i);
2307     }
2308
2309   data->consider_all_candidates = (n_iv_cands (data)
2310                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2311
2312   if (data->consider_all_candidates)
2313     {
2314       /* We will not need "related_cands" bitmaps in this case,
2315          so release them to decrease peak memory consumption.  */
2316       for (i = 0; i < n_iv_uses (data); i++)
2317         {
2318           use = iv_use (data, i);
2319           BITMAP_FREE (use->related_cands);
2320         }
2321     }
2322   else
2323     {
2324       /* Add important candidates to the related_cands bitmaps.  */
2325       for (i = 0; i < n_iv_uses (data); i++)
2326         bitmap_ior_into (iv_use (data, i)->related_cands,
2327                          data->important_candidates);
2328     }
2329 }
2330
2331 /* Finds the candidates for the induction variables.  */
2332
2333 static void
2334 find_iv_candidates (struct ivopts_data *data)
2335 {
2336   /* Add commonly used ivs.  */
2337   add_standard_iv_candidates (data);
2338
2339   /* Add old induction variables.  */
2340   add_old_ivs_candidates (data);
2341
2342   /* Add induction variables derived from uses.  */
2343   add_derived_ivs_candidates (data);
2344
2345   /* Record the important candidates.  */
2346   record_important_candidates (data);
2347 }
2348
2349 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2350    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2351    we allocate a simple list to every use.  */
2352
2353 static void
2354 alloc_use_cost_map (struct ivopts_data *data)
2355 {
2356   unsigned i, size, s, j;
2357
2358   for (i = 0; i < n_iv_uses (data); i++)
2359     {
2360       struct iv_use *use = iv_use (data, i);
2361       bitmap_iterator bi;
2362
2363       if (data->consider_all_candidates)
2364         size = n_iv_cands (data);
2365       else
2366         {
2367           s = 0;
2368           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2369             {
2370               s++;
2371             }
2372
2373           /* Round up to the power of two, so that moduling by it is fast.  */
2374           for (size = 1; size < s; size <<= 1)
2375             continue;
2376         }
2377
2378       use->n_map_members = size;
2379       use->cost_map = XCNEWVEC (struct cost_pair, size);
2380     }
2381 }
2382
2383 /* Returns description of computation cost of expression whose runtime
2384    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2385
2386 static comp_cost
2387 new_cost (unsigned runtime, unsigned complexity)
2388 {
2389   comp_cost cost;
2390
2391   cost.cost = runtime;
2392   cost.complexity = complexity;
2393
2394   return cost;
2395 }
2396
2397 /* Adds costs COST1 and COST2.  */
2398
2399 static comp_cost
2400 add_costs (comp_cost cost1, comp_cost cost2)
2401 {
2402   cost1.cost += cost2.cost;
2403   cost1.complexity += cost2.complexity;
2404
2405   return cost1;
2406 }
2407 /* Subtracts costs COST1 and COST2.  */
2408
2409 static comp_cost
2410 sub_costs (comp_cost cost1, comp_cost cost2)
2411 {
2412   cost1.cost -= cost2.cost;
2413   cost1.complexity -= cost2.complexity;
2414
2415   return cost1;
2416 }
2417
2418 /* Returns a negative number if COST1 < COST2, a positive number if
2419    COST1 > COST2, and 0 if COST1 = COST2.  */
2420
2421 static int
2422 compare_costs (comp_cost cost1, comp_cost cost2)
2423 {
2424   if (cost1.cost == cost2.cost)
2425     return cost1.complexity - cost2.complexity;
2426
2427   return cost1.cost - cost2.cost;
2428 }
2429
2430 /* Returns true if COST is infinite.  */
2431
2432 static bool
2433 infinite_cost_p (comp_cost cost)
2434 {
2435   return cost.cost == INFTY;
2436 }
2437
2438 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2439    on invariants DEPENDS_ON and that the value used in expressing it
2440    is VALUE.*/
2441
2442 static void
2443 set_use_iv_cost (struct ivopts_data *data,
2444                  struct iv_use *use, struct iv_cand *cand,
2445                  comp_cost cost, bitmap depends_on, tree value)
2446 {
2447   unsigned i, s;
2448
2449   if (infinite_cost_p (cost))
2450     {
2451       BITMAP_FREE (depends_on);
2452       return;
2453     }
2454
2455   if (data->consider_all_candidates)
2456     {
2457       use->cost_map[cand->id].cand = cand;
2458       use->cost_map[cand->id].cost = cost;
2459       use->cost_map[cand->id].depends_on = depends_on;
2460       use->cost_map[cand->id].value = value;
2461       return;
2462     }
2463
2464   /* n_map_members is a power of two, so this computes modulo.  */
2465   s = cand->id & (use->n_map_members - 1);
2466   for (i = s; i < use->n_map_members; i++)
2467     if (!use->cost_map[i].cand)
2468       goto found;
2469   for (i = 0; i < s; i++)
2470     if (!use->cost_map[i].cand)
2471       goto found;
2472
2473   gcc_unreachable ();
2474
2475 found:
2476   use->cost_map[i].cand = cand;
2477   use->cost_map[i].cost = cost;
2478   use->cost_map[i].depends_on = depends_on;
2479   use->cost_map[i].value = value;
2480 }
2481
2482 /* Gets cost of (USE, CANDIDATE) pair.  */
2483
2484 static struct cost_pair *
2485 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2486                  struct iv_cand *cand)
2487 {
2488   unsigned i, s;
2489   struct cost_pair *ret;
2490
2491   if (!cand)
2492     return NULL;
2493
2494   if (data->consider_all_candidates)
2495     {
2496       ret = use->cost_map + cand->id;
2497       if (!ret->cand)
2498         return NULL;
2499
2500       return ret;
2501     }
2502
2503   /* n_map_members is a power of two, so this computes modulo.  */
2504   s = cand->id & (use->n_map_members - 1);
2505   for (i = s; i < use->n_map_members; i++)
2506     if (use->cost_map[i].cand == cand)
2507       return use->cost_map + i;
2508
2509   for (i = 0; i < s; i++)
2510     if (use->cost_map[i].cand == cand)
2511       return use->cost_map + i;
2512
2513   return NULL;
2514 }
2515
2516 /* Returns estimate on cost of computing SEQ.  */
2517
2518 static unsigned
2519 seq_cost (rtx seq)
2520 {
2521   unsigned cost = 0;
2522   rtx set;
2523
2524   for (; seq; seq = NEXT_INSN (seq))
2525     {
2526       set = single_set (seq);
2527       if (set)
2528         cost += rtx_cost (set, SET);
2529       else
2530         cost++;
2531     }
2532
2533   return cost;
2534 }
2535
2536 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2537 static rtx
2538 produce_memory_decl_rtl (tree obj, int *regno)
2539 {
2540   rtx x;
2541
2542   gcc_assert (obj);
2543   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2544     {
2545       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2546       x = gen_rtx_SYMBOL_REF (Pmode, name);
2547       SET_SYMBOL_REF_DECL (x, obj);
2548       x = gen_rtx_MEM (DECL_MODE (obj), x);
2549       targetm.encode_section_info (obj, x, true);
2550     }
2551   else
2552     {
2553       x = gen_raw_REG (Pmode, (*regno)++);
2554       x = gen_rtx_MEM (DECL_MODE (obj), x);
2555     }
2556
2557   return x;
2558 }
2559
2560 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2561    walk_tree.  DATA contains the actual fake register number.  */
2562
2563 static tree
2564 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2565 {
2566   tree obj = NULL_TREE;
2567   rtx x = NULL_RTX;
2568   int *regno = (int *) data;
2569
2570   switch (TREE_CODE (*expr_p))
2571     {
2572     case ADDR_EXPR:
2573       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2574            handled_component_p (*expr_p);
2575            expr_p = &TREE_OPERAND (*expr_p, 0))
2576         continue;
2577       obj = *expr_p;
2578       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2579         x = produce_memory_decl_rtl (obj, regno);
2580       break;
2581
2582     case SSA_NAME:
2583       *ws = 0;
2584       obj = SSA_NAME_VAR (*expr_p);
2585       if (!DECL_RTL_SET_P (obj))
2586         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2587       break;
2588
2589     case VAR_DECL:
2590     case PARM_DECL:
2591     case RESULT_DECL:
2592       *ws = 0;
2593       obj = *expr_p;
2594
2595       if (DECL_RTL_SET_P (obj))
2596         break;
2597
2598       if (DECL_MODE (obj) == BLKmode)
2599         x = produce_memory_decl_rtl (obj, regno);
2600       else
2601         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2602
2603       break;
2604
2605     default:
2606       break;
2607     }
2608
2609   if (x)
2610     {
2611       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2612       SET_DECL_RTL (obj, x);
2613     }
2614
2615   return NULL_TREE;
2616 }
2617
2618 /* Determines cost of the computation of EXPR.  */
2619
2620 static unsigned
2621 computation_cost (tree expr)
2622 {
2623   rtx seq, rslt;
2624   tree type = TREE_TYPE (expr);
2625   unsigned cost;
2626   /* Avoid using hard regs in ways which may be unsupported.  */
2627   int regno = LAST_VIRTUAL_REGISTER + 1;
2628
2629   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2630   start_sequence ();
2631   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2632   seq = get_insns ();
2633   end_sequence ();
2634
2635   cost = seq_cost (seq);
2636   if (MEM_P (rslt))
2637     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2638
2639   return cost;
2640 }
2641
2642 /* Returns variable containing the value of candidate CAND at statement AT.  */
2643
2644 static tree
2645 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2646 {
2647   if (stmt_after_increment (loop, cand, stmt))
2648     return cand->var_after;
2649   else
2650     return cand->var_before;
2651 }
2652
2653 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2654    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2655
2656 int
2657 tree_int_cst_sign_bit (const_tree t)
2658 {
2659   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2660   unsigned HOST_WIDE_INT w;
2661
2662   if (bitno < HOST_BITS_PER_WIDE_INT)
2663     w = TREE_INT_CST_LOW (t);
2664   else
2665     {
2666       w = TREE_INT_CST_HIGH (t);
2667       bitno -= HOST_BITS_PER_WIDE_INT;
2668     }
2669
2670   return (w >> bitno) & 1;
2671 }
2672
2673 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2674    same precision that is at least as wide as the precision of TYPE, stores
2675    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2676    type of A and B.  */
2677
2678 static tree
2679 determine_common_wider_type (tree *a, tree *b)
2680 {
2681   tree wider_type = NULL;
2682   tree suba, subb;
2683   tree atype = TREE_TYPE (*a);
2684
2685   if (CONVERT_EXPR_P (*a))
2686     {
2687       suba = TREE_OPERAND (*a, 0);
2688       wider_type = TREE_TYPE (suba);
2689       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2690         return atype;
2691     }
2692   else
2693     return atype;
2694
2695   if (CONVERT_EXPR_P (*b))
2696     {
2697       subb = TREE_OPERAND (*b, 0);
2698       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2699         return atype;
2700     }
2701   else
2702     return atype;
2703
2704   *a = suba;
2705   *b = subb;
2706   return wider_type;
2707 }
2708
2709 /* Determines the expression by that USE is expressed from induction variable
2710    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2711    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2712
2713 static bool
2714 get_computation_aff (struct loop *loop,
2715                      struct iv_use *use, struct iv_cand *cand, tree at,
2716                      struct affine_tree_combination *aff)
2717 {
2718   tree ubase = use->iv->base;
2719   tree ustep = use->iv->step;
2720   tree cbase = cand->iv->base;
2721   tree cstep = cand->iv->step, cstep_common;
2722   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2723   tree common_type, var;
2724   tree uutype;
2725   aff_tree cbase_aff, var_aff;
2726   double_int rat;
2727
2728   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2729     {
2730       /* We do not have a precision to express the values of use.  */
2731       return false;
2732     }
2733
2734   var = var_at_stmt (loop, cand, at);
2735   uutype = unsigned_type_for (utype);
2736
2737   /* If the conversion is not noop, perform it.  */
2738   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2739     {
2740       cstep = fold_convert (uutype, cstep);
2741       cbase = fold_convert (uutype, cbase);
2742       var = fold_convert (uutype, var);
2743     }
2744
2745   if (!constant_multiple_of (ustep, cstep, &rat))
2746     return false;
2747
2748   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2749      type, we achieve better folding by computing their difference in this
2750      wider type, and cast the result to UUTYPE.  We do not need to worry about
2751      overflows, as all the arithmetics will in the end be performed in UUTYPE
2752      anyway.  */
2753   common_type = determine_common_wider_type (&ubase, &cbase);
2754
2755   /* use = ubase - ratio * cbase + ratio * var.  */
2756   tree_to_aff_combination (ubase, common_type, aff);
2757   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2758   tree_to_aff_combination (var, uutype, &var_aff);
2759
2760   /* We need to shift the value if we are after the increment.  */
2761   if (stmt_after_increment (loop, cand, at))
2762     {
2763       aff_tree cstep_aff;
2764
2765       if (common_type != uutype)
2766         cstep_common = fold_convert (common_type, cstep);
2767       else
2768         cstep_common = cstep;
2769
2770       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2771       aff_combination_add (&cbase_aff, &cstep_aff);
2772     }
2773
2774   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2775   aff_combination_add (aff, &cbase_aff);
2776   if (common_type != uutype)
2777     aff_combination_convert (aff, uutype);
2778
2779   aff_combination_scale (&var_aff, rat);
2780   aff_combination_add (aff, &var_aff);
2781
2782   return true;
2783 }
2784
2785 /* Determines the expression by that USE is expressed from induction variable
2786    CAND at statement AT in LOOP.  The computation is unshared.  */
2787
2788 static tree
2789 get_computation_at (struct loop *loop,
2790                     struct iv_use *use, struct iv_cand *cand, tree at)
2791 {
2792   aff_tree aff;
2793   tree type = TREE_TYPE (use->iv->base);
2794
2795   if (!get_computation_aff (loop, use, cand, at, &aff))
2796     return NULL_TREE;
2797   unshare_aff_combination (&aff);
2798   return fold_convert (type, aff_combination_to_tree (&aff));
2799 }
2800
2801 /* Determines the expression by that USE is expressed from induction variable
2802    CAND in LOOP.  The computation is unshared.  */
2803
2804 static tree
2805 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2806 {
2807   return get_computation_at (loop, use, cand, use->stmt);
2808 }
2809
2810 /* Returns cost of addition in MODE.  */
2811
2812 static unsigned
2813 add_cost (enum machine_mode mode)
2814 {
2815   static unsigned costs[NUM_MACHINE_MODES];
2816   rtx seq;
2817   unsigned cost;
2818
2819   if (costs[mode])
2820     return costs[mode];
2821
2822   start_sequence ();
2823   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2824                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2825                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2826                  NULL_RTX);
2827   seq = get_insns ();
2828   end_sequence ();
2829
2830   cost = seq_cost (seq);
2831   if (!cost)
2832     cost = 1;
2833
2834   costs[mode] = cost;
2835
2836   if (dump_file && (dump_flags & TDF_DETAILS))
2837     fprintf (dump_file, "Addition in %s costs %d\n",
2838              GET_MODE_NAME (mode), cost);
2839   return cost;
2840 }
2841
2842 /* Entry in a hashtable of already known costs for multiplication.  */
2843 struct mbc_entry
2844 {
2845   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2846   enum machine_mode mode;       /* In mode.  */
2847   unsigned cost;                /* The cost.  */
2848 };
2849
2850 /* Counts hash value for the ENTRY.  */
2851
2852 static hashval_t
2853 mbc_entry_hash (const void *entry)
2854 {
2855   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2856
2857   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2858 }
2859
2860 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2861
2862 static int
2863 mbc_entry_eq (const void *entry1, const void *entry2)
2864 {
2865   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2866   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2867
2868   return (e1->mode == e2->mode
2869           && e1->cst == e2->cst);
2870 }
2871
2872 /* Returns cost of multiplication by constant CST in MODE.  */
2873
2874 unsigned
2875 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2876 {
2877   static htab_t costs;
2878   struct mbc_entry **cached, act;
2879   rtx seq;
2880   unsigned cost;
2881
2882   if (!costs)
2883     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2884
2885   act.mode = mode;
2886   act.cst = cst;
2887   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2888   if (*cached)
2889     return (*cached)->cost;
2890
2891   *cached = XNEW (struct mbc_entry);
2892   (*cached)->mode = mode;
2893   (*cached)->cst = cst;
2894
2895   start_sequence ();
2896   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2897                gen_int_mode (cst, mode), NULL_RTX, 0);
2898   seq = get_insns ();
2899   end_sequence ();
2900
2901   cost = seq_cost (seq);
2902
2903   if (dump_file && (dump_flags & TDF_DETAILS))
2904     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2905              (int) cst, GET_MODE_NAME (mode), cost);
2906
2907   (*cached)->cost = cost;
2908
2909   return cost;
2910 }
2911
2912 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2913    validity for a memory reference accessing memory of mode MODE.  */
2914
2915 bool
2916 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2917 {
2918 #define MAX_RATIO 128
2919   static sbitmap valid_mult[MAX_MACHINE_MODE];
2920
2921   if (!valid_mult[mode])
2922     {
2923       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2924       rtx addr;
2925       HOST_WIDE_INT i;
2926
2927       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2928       sbitmap_zero (valid_mult[mode]);
2929       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2930       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2931         {
2932           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2933           if (memory_address_p (mode, addr))
2934             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2935         }
2936
2937       if (dump_file && (dump_flags & TDF_DETAILS))
2938         {
2939           fprintf (dump_file, "  allowed multipliers:");
2940           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2941             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2942               fprintf (dump_file, " %d", (int) i);
2943           fprintf (dump_file, "\n");
2944           fprintf (dump_file, "\n");
2945         }
2946     }
2947
2948   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2949     return false;
2950
2951   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2952 }
2953
2954 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2955    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2956    variable is omitted.  Compute the cost for a memory reference that accesses
2957    a memory location of mode MEM_MODE.
2958
2959    TODO -- there must be some better way.  This all is quite crude.  */
2960
2961 static comp_cost
2962 get_address_cost (bool symbol_present, bool var_present,
2963                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2964                   enum machine_mode mem_mode)
2965 {
2966   static bool initialized[MAX_MACHINE_MODE];
2967   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2968   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2969   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2970   unsigned cost, acost, complexity;
2971   bool offset_p, ratio_p;
2972   HOST_WIDE_INT s_offset;
2973   unsigned HOST_WIDE_INT mask;
2974   unsigned bits;
2975
2976   if (!initialized[mem_mode])
2977     {
2978       HOST_WIDE_INT i;
2979       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2980       int old_cse_not_expected;
2981       unsigned sym_p, var_p, off_p, rat_p, add_c;
2982       rtx seq, addr, base;
2983       rtx reg0, reg1;
2984
2985       initialized[mem_mode] = true;
2986
2987       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2988
2989       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2990       for (i = start; i <= 1 << 20; i <<= 1)
2991         {
2992           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2993           if (!memory_address_p (mem_mode, addr))
2994             break;
2995         }
2996       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2997       off[mem_mode] = max_offset[mem_mode];
2998
2999       for (i = start; i <= 1 << 20; i <<= 1)
3000         {
3001           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3002           if (!memory_address_p (mem_mode, addr))
3003             break;
3004         }
3005       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3006
3007       if (dump_file && (dump_flags & TDF_DETAILS))
3008         {
3009           fprintf (dump_file, "get_address_cost:\n");
3010           fprintf (dump_file, "  min offset %s %d\n",
3011                    GET_MODE_NAME (mem_mode),
3012                    (int) min_offset[mem_mode]);
3013           fprintf (dump_file, "  max offset %s %d\n",
3014                    GET_MODE_NAME (mem_mode),
3015                    (int) max_offset[mem_mode]);
3016         }
3017
3018       rat[mem_mode] = 1;
3019       for (i = 2; i <= MAX_RATIO; i++)
3020         if (multiplier_allowed_in_address_p (i, mem_mode))
3021           {
3022             rat[mem_mode] = i;
3023             break;
3024           }
3025
3026       /* Compute the cost of various addressing modes.  */
3027       acost = 0;
3028       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3029       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3030
3031       for (i = 0; i < 16; i++)
3032         {
3033           sym_p = i & 1;
3034           var_p = (i >> 1) & 1;
3035           off_p = (i >> 2) & 1;
3036           rat_p = (i >> 3) & 1;
3037
3038           addr = reg0;
3039           if (rat_p)
3040             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3041                                    gen_int_mode (rat[mem_mode], Pmode));
3042
3043           if (var_p)
3044             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3045
3046           if (sym_p)
3047             {
3048               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3049               /* ??? We can run into trouble with some backends by presenting
3050                  it with symbols which havn't been properly passed through
3051                  targetm.encode_section_info.  By setting the local bit, we
3052                  enhance the probability of things working.  */
3053               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3054
3055               if (off_p)
3056                 base = gen_rtx_fmt_e (CONST, Pmode,
3057                                       gen_rtx_fmt_ee (PLUS, Pmode,
3058                                                       base,
3059                                                       gen_int_mode (off[mem_mode],
3060                                                                     Pmode)));
3061             }
3062           else if (off_p)
3063             base = gen_int_mode (off[mem_mode], Pmode);
3064           else
3065             base = NULL_RTX;
3066
3067           if (base)
3068             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3069
3070           start_sequence ();
3071           /* To avoid splitting addressing modes, pretend that no cse will
3072              follow.  */
3073           old_cse_not_expected = cse_not_expected;
3074           cse_not_expected = true;
3075           addr = memory_address (mem_mode, addr);
3076           cse_not_expected = old_cse_not_expected;
3077           seq = get_insns ();
3078           end_sequence ();
3079
3080           acost = seq_cost (seq);
3081           acost += address_cost (addr, mem_mode);
3082
3083           if (!acost)
3084             acost = 1;
3085           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3086         }
3087
3088       /* On some targets, it is quite expensive to load symbol to a register,
3089          which makes addresses that contain symbols look much more expensive.
3090          However, the symbol will have to be loaded in any case before the
3091          loop (and quite likely we have it in register already), so it does not
3092          make much sense to penalize them too heavily.  So make some final
3093          tweaks for the SYMBOL_PRESENT modes:
3094
3095          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3096          var is cheaper, use this mode with small penalty.
3097          If VAR_PRESENT is true, try whether the mode with
3098          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3099          if this is the case, use it.  */
3100       add_c = add_cost (Pmode);
3101       for (i = 0; i < 8; i++)
3102         {
3103           var_p = i & 1;
3104           off_p = (i >> 1) & 1;
3105           rat_p = (i >> 2) & 1;
3106
3107           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3108           if (var_p)
3109             acost += add_c;
3110
3111           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3112             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3113         }
3114
3115       if (dump_file && (dump_flags & TDF_DETAILS))
3116         {
3117           fprintf (dump_file, "Address costs:\n");
3118
3119           for (i = 0; i < 16; i++)
3120             {
3121               sym_p = i & 1;
3122               var_p = (i >> 1) & 1;
3123               off_p = (i >> 2) & 1;
3124               rat_p = (i >> 3) & 1;
3125
3126               fprintf (dump_file, "  ");
3127               if (sym_p)
3128                 fprintf (dump_file, "sym + ");
3129               if (var_p)
3130                 fprintf (dump_file, "var + ");
3131               if (off_p)
3132                 fprintf (dump_file, "cst + ");
3133               if (rat_p)
3134                 fprintf (dump_file, "rat * ");
3135
3136               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3137               fprintf (dump_file, "index costs %d\n", acost);
3138             }
3139           fprintf (dump_file, "\n");
3140         }
3141     }
3142
3143   bits = GET_MODE_BITSIZE (Pmode);
3144   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3145   offset &= mask;
3146   if ((offset >> (bits - 1) & 1))
3147     offset |= ~mask;
3148   s_offset = offset;
3149
3150   cost = 0;
3151   offset_p = (s_offset != 0
3152               && min_offset[mem_mode] <= s_offset
3153               && s_offset <= max_offset[mem_mode]);
3154   ratio_p = (ratio != 1
3155              && multiplier_allowed_in_address_p (ratio, mem_mode));
3156
3157   if (ratio != 1 && !ratio_p)
3158     cost += multiply_by_cost (ratio, Pmode);
3159
3160   if (s_offset && !offset_p && !symbol_present)
3161     cost += add_cost (Pmode);
3162
3163   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3164   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3165   return new_cost (cost + acost, complexity);
3166 }
3167
3168 /* Estimates cost of forcing expression EXPR into a variable.  */
3169
3170 static comp_cost
3171 force_expr_to_var_cost (tree expr)
3172 {
3173   static bool costs_initialized = false;
3174   static unsigned integer_cost;
3175   static unsigned symbol_cost;
3176   static unsigned address_cost;
3177   tree op0, op1;
3178   comp_cost cost0, cost1, cost;
3179   enum machine_mode mode;
3180
3181   if (!costs_initialized)
3182     {
3183       tree type = build_pointer_type (integer_type_node);
3184       tree var, addr;
3185       rtx x;
3186
3187       var = create_tmp_var_raw (integer_type_node, "test_var");
3188       TREE_STATIC (var) = 1;
3189       x = produce_memory_decl_rtl (var, NULL);
3190       SET_DECL_RTL (var, x);
3191
3192       integer_cost = computation_cost (build_int_cst (integer_type_node,
3193                                                       2000));
3194
3195       addr = build1 (ADDR_EXPR, type, var);
3196       symbol_cost = computation_cost (addr) + 1;
3197
3198       address_cost
3199         = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3200                                     addr,
3201                                     build_int_cst (sizetype, 2000))) + 1;
3202       if (dump_file && (dump_flags & TDF_DETAILS))
3203         {
3204           fprintf (dump_file, "force_expr_to_var_cost:\n");
3205           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3206           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3207           fprintf (dump_file, "  address %d\n", (int) address_cost);
3208           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3209           fprintf (dump_file, "\n");
3210         }
3211
3212       costs_initialized = true;
3213     }
3214
3215   STRIP_NOPS (expr);
3216
3217   if (SSA_VAR_P (expr))
3218     return zero_cost;
3219
3220   if (is_gimple_min_invariant (expr))
3221     {
3222       if (TREE_CODE (expr) == INTEGER_CST)
3223         return new_cost (integer_cost, 0);
3224
3225       if (TREE_CODE (expr) == ADDR_EXPR)
3226         {
3227           tree obj = TREE_OPERAND (expr, 0);
3228
3229           if (TREE_CODE (obj) == VAR_DECL
3230               || TREE_CODE (obj) == PARM_DECL
3231               || TREE_CODE (obj) == RESULT_DECL)
3232             return new_cost (symbol_cost, 0);
3233         }
3234
3235       return new_cost (address_cost, 0);
3236     }
3237
3238   switch (TREE_CODE (expr))
3239     {
3240     case POINTER_PLUS_EXPR:
3241     case PLUS_EXPR:
3242     case MINUS_EXPR:
3243     case MULT_EXPR:
3244       op0 = TREE_OPERAND (expr, 0);
3245       op1 = TREE_OPERAND (expr, 1);
3246       STRIP_NOPS (op0);
3247       STRIP_NOPS (op1);
3248
3249       if (is_gimple_val (op0))
3250         cost0 = zero_cost;
3251       else
3252         cost0 = force_expr_to_var_cost (op0);
3253
3254       if (is_gimple_val (op1))
3255         cost1 = zero_cost;
3256       else
3257         cost1 = force_expr_to_var_cost (op1);
3258
3259       break;
3260
3261     default:
3262       /* Just an arbitrary value, FIXME.  */
3263       return new_cost (target_spill_cost, 0);
3264     }
3265
3266   mode = TYPE_MODE (TREE_TYPE (expr));
3267   switch (TREE_CODE (expr))
3268     {
3269     case POINTER_PLUS_EXPR:
3270     case PLUS_EXPR:
3271     case MINUS_EXPR:
3272       cost = new_cost (add_cost (mode), 0);
3273       break;
3274
3275     case MULT_EXPR:
3276       if (cst_and_fits_in_hwi (op0))
3277         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3278       else if (cst_and_fits_in_hwi (op1))
3279         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3280       else
3281         return new_cost (target_spill_cost, 0);
3282       break;
3283
3284     default:
3285       gcc_unreachable ();
3286     }
3287
3288   cost = add_costs (cost, cost0);
3289   cost = add_costs (cost, cost1);
3290
3291   /* Bound the cost by target_spill_cost.  The parts of complicated
3292      computations often are either loop invariant or at least can
3293      be shared between several iv uses, so letting this grow without
3294      limits would not give reasonable results.  */
3295   if (cost.cost > target_spill_cost)
3296     cost.cost = target_spill_cost;
3297
3298   return cost;
3299 }
3300
3301 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3302    invariants the computation depends on.  */
3303
3304 static comp_cost
3305 force_var_cost (struct ivopts_data *data,
3306                 tree expr, bitmap *depends_on)
3307 {
3308   if (depends_on)
3309     {
3310       fd_ivopts_data = data;
3311       walk_tree (&expr, find_depends, depends_on, NULL);
3312     }
3313
3314   return force_expr_to_var_cost (expr);
3315 }
3316
3317 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3318    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3319    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3320    invariants the computation depends on.  */
3321
3322 static comp_cost
3323 split_address_cost (struct ivopts_data *data,
3324                     tree addr, bool *symbol_present, bool *var_present,
3325                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3326 {
3327   tree core;
3328   HOST_WIDE_INT bitsize;
3329   HOST_WIDE_INT bitpos;
3330   tree toffset;
3331   enum machine_mode mode;
3332   int unsignedp, volatilep;
3333
3334   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3335                               &unsignedp, &volatilep, false);
3336
3337   if (toffset != 0
3338       || bitpos % BITS_PER_UNIT != 0
3339       || TREE_CODE (core) != VAR_DECL)
3340     {
3341       *symbol_present = false;
3342       *var_present = true;
3343       fd_ivopts_data = data;
3344       walk_tree (&addr, find_depends, depends_on, NULL);
3345       return new_cost (target_spill_cost, 0);
3346     }
3347
3348   *offset += bitpos / BITS_PER_UNIT;
3349   if (TREE_STATIC (core)
3350       || DECL_EXTERNAL (core))
3351     {
3352       *symbol_present = true;
3353       *var_present = false;
3354       return zero_cost;
3355     }
3356
3357   *symbol_present = false;
3358   *var_present = true;
3359   return zero_cost;
3360 }
3361
3362 /* Estimates cost of expressing difference of addresses E1 - E2 as
3363    var + symbol + offset.  The value of offset is added to OFFSET,
3364    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3365    part is missing.  DEPENDS_ON is a set of the invariants the computation
3366    depends on.  */
3367
3368 static comp_cost
3369 ptr_difference_cost (struct ivopts_data *data,
3370                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3371                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3372 {
3373   HOST_WIDE_INT diff = 0;
3374   comp_cost cost;
3375
3376   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3377
3378   if (ptr_difference_const (e1, e2, &diff))
3379     {
3380       *offset += diff;
3381       *symbol_present = false;
3382       *var_present = false;
3383       return zero_cost;
3384     }
3385
3386   if (integer_zerop (e2))
3387     return split_address_cost (data, TREE_OPERAND (e1, 0),
3388                                symbol_present, var_present, offset, depends_on);
3389
3390   *symbol_present = false;
3391   *var_present = true;
3392
3393   cost = force_var_cost (data, e1, depends_on);
3394   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3395   cost.cost += add_cost (Pmode);
3396
3397   return cost;
3398 }
3399
3400 /* Estimates cost of expressing difference E1 - E2 as
3401    var + symbol + offset.  The value of offset is added to OFFSET,
3402    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3403    part is missing.  DEPENDS_ON is a set of the invariants the computation
3404    depends on.  */
3405
3406 static comp_cost
3407 difference_cost (struct ivopts_data *data,
3408                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3409                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3410 {
3411   comp_cost cost;
3412   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3413   unsigned HOST_WIDE_INT off1, off2;
3414
3415   e1 = strip_offset (e1, &off1);
3416   e2 = strip_offset (e2, &off2);
3417   *offset += off1 - off2;
3418
3419   STRIP_NOPS (e1);
3420   STRIP_NOPS (e2);
3421
3422   if (TREE_CODE (e1) == ADDR_EXPR)
3423     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3424                                 depends_on);
3425   *symbol_present = false;
3426
3427   if (operand_equal_p (e1, e2, 0))
3428     {
3429       *var_present = false;
3430       return zero_cost;
3431     }
3432   *var_present = true;
3433   if (integer_zerop (e2))
3434     return force_var_cost (data, e1, depends_on);
3435
3436   if (integer_zerop (e1))
3437     {
3438       cost = force_var_cost (data, e2, depends_on);
3439       cost.cost += multiply_by_cost (-1, mode);
3440
3441       return cost;
3442     }
3443
3444   cost = force_var_cost (data, e1, depends_on);
3445   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3446   cost.cost += add_cost (mode);
3447
3448   return cost;
3449 }
3450
3451 /* Determines the cost of the computation by that USE is expressed
3452    from induction variable CAND.  If ADDRESS_P is true, we just need
3453    to create an address from it, otherwise we want to get it into
3454    register.  A set of invariants we depend on is stored in
3455    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3456
3457 static comp_cost
3458 get_computation_cost_at (struct ivopts_data *data,
3459                          struct iv_use *use, struct iv_cand *cand,
3460                          bool address_p, bitmap *depends_on, tree at)
3461 {
3462   tree ubase = use->iv->base, ustep = use->iv->step;
3463   tree cbase, cstep;
3464   tree utype = TREE_TYPE (ubase), ctype;
3465   unsigned HOST_WIDE_INT cstepi, offset = 0;
3466   HOST_WIDE_INT ratio, aratio;
3467   bool var_present, symbol_present;
3468   comp_cost cost;
3469   unsigned n_sums;
3470   double_int rat;
3471
3472   *depends_on = NULL;
3473
3474   /* Only consider real candidates.  */
3475   if (!cand->iv)
3476     return infinite_cost;
3477
3478   cbase = cand->iv->base;
3479   cstep = cand->iv->step;
3480   ctype = TREE_TYPE (cbase);
3481
3482   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3483     {
3484       /* We do not have a precision to express the values of use.  */
3485       return infinite_cost;
3486     }
3487
3488   if (address_p)
3489     {
3490       /* Do not try to express address of an object with computation based
3491          on address of a different object.  This may cause problems in rtl
3492          level alias analysis (that does not expect this to be happening,
3493          as this is illegal in C), and would be unlikely to be useful
3494          anyway.  */
3495       if (use->iv->base_object
3496           && cand->iv->base_object
3497           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3498         return infinite_cost;
3499     }
3500
3501   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3502     {
3503       /* TODO -- add direct handling of this case.  */
3504       goto fallback;
3505     }
3506
3507   /* CSTEPI is removed from the offset in case statement is after the
3508      increment.  If the step is not constant, we use zero instead.
3509      This is a bit imprecise (there is the extra addition), but
3510      redundancy elimination is likely to transform the code so that
3511      it uses value of the variable before increment anyway,
3512      so it is not that much unrealistic.  */
3513   if (cst_and_fits_in_hwi (cstep))
3514     cstepi = int_cst_value (cstep);
3515   else
3516     cstepi = 0;
3517
3518   if (!constant_multiple_of (ustep, cstep, &rat))
3519     return infinite_cost;
3520
3521   if (double_int_fits_in_shwi_p (rat))
3522     ratio = double_int_to_shwi (rat);
3523   else
3524     return infinite_cost;
3525
3526   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3527      or ratio == 1, it is better to handle this like
3528
3529      ubase - ratio * cbase + ratio * var
3530
3531      (also holds in the case ratio == -1, TODO.  */
3532
3533   if (cst_and_fits_in_hwi (cbase))
3534     {
3535       offset = - ratio * int_cst_value (cbase);
3536       cost = difference_cost (data,
3537                               ubase, build_int_cst (utype, 0),
3538                               &symbol_present, &var_present, &offset,
3539                               depends_on);
3540     }
3541   else if (ratio == 1)
3542     {
3543       cost = difference_cost (data,
3544                               ubase, cbase,
3545                               &symbol_present, &var_present, &offset,
3546                               depends_on);
3547     }
3548   else
3549     {
3550       cost = force_var_cost (data, cbase, depends_on);
3551       cost.cost += add_cost (TYPE_MODE (ctype));
3552       cost = add_costs (cost,
3553                         difference_cost (data,
3554                                          ubase, build_int_cst (utype, 0),
3555                                          &symbol_present, &var_present,
3556                                          &offset, depends_on));
3557     }
3558
3559   /* If we are after the increment, the value of the candidate is higher by
3560      one iteration.  */
3561   if (stmt_after_increment (data->current_loop, cand, at))
3562     offset -= ratio * cstepi;
3563
3564   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3565      (symbol/var/const parts may be omitted).  If we are looking for an address,
3566      find the cost of addressing this.  */
3567   if (address_p)
3568     return add_costs (cost, get_address_cost (symbol_present, var_present,
3569                                 offset, ratio,
3570                                 TYPE_MODE (TREE_TYPE (*use->op_p))));
3571
3572   /* Otherwise estimate the costs for computing the expression.  */
3573   aratio = ratio > 0 ? ratio : -ratio;
3574   if (!symbol_present && !var_present && !offset)
3575     {
3576       if (ratio != 1)
3577         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3578
3579       return cost;
3580     }
3581
3582   if (aratio != 1)
3583     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3584
3585   n_sums = 1;
3586   if (var_present
3587       /* Symbol + offset should be compile-time computable.  */
3588       && (symbol_present || offset))
3589     n_sums++;
3590
3591   /* Having offset does not affect runtime cost in case it is added to
3592      symbol, but it increases complexity.  */
3593   if (offset)
3594     cost.complexity++;
3595
3596   cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3597   return cost;
3598
3599 fallback:
3600   {
3601     /* Just get the expression, expand it and measure the cost.  */
3602     tree comp = get_computation_at (data->current_loop, use, cand, at);
3603
3604     if (!comp)
3605       return infinite_cost;
3606
3607     if (address_p)
3608       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3609
3610     return new_cost (computation_cost (comp), 0);
3611   }
3612 }
3613
3614 /* Determines the cost of the computation by that USE is expressed
3615    from induction variable CAND.  If ADDRESS_P is true, we just need
3616    to create an address from it, otherwise we want to get it into
3617    register.  A set of invariants we depend on is stored in
3618    DEPENDS_ON.  */
3619
3620 static comp_cost
3621 get_computation_cost (struct ivopts_data *data,
3622                       struct iv_use *use, struct iv_cand *cand,
3623                       bool address_p, bitmap *depends_on)
3624 {
3625   return get_computation_cost_at (data,
3626                                   use, cand, address_p, depends_on, use->stmt);
3627 }
3628
3629 /* Determines cost of basing replacement of USE on CAND in a generic
3630    expression.  */
3631
3632 static bool
3633 determine_use_iv_cost_generic (struct ivopts_data *data,
3634                                struct iv_use *use, struct iv_cand *cand)
3635 {
3636   bitmap depends_on;
3637   comp_cost cost;
3638
3639   /* The simple case first -- if we need to express value of the preserved
3640      original biv, the cost is 0.  This also prevents us from counting the
3641      cost of increment twice -- once at this use and once in the cost of
3642      the candidate.  */
3643   if (cand->pos == IP_ORIGINAL
3644       && cand->incremented_at == use->stmt)
3645     {
3646       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3647       return true;
3648     }
3649
3650   cost = get_computation_cost (data, use, cand, false, &depends_on);
3651   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3652
3653   return !infinite_cost_p (cost);
3654 }
3655
3656 /* Determines cost of basing replacement of USE on CAND in an address.  */
3657
3658 static bool
3659 determine_use_iv_cost_address (struct ivopts_data *data,
3660                                struct iv_use *use, struct iv_cand *cand)
3661 {
3662   bitmap depends_on;
3663   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3664
3665   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3666
3667   return !infinite_cost_p (cost);
3668 }
3669
3670 /* Computes value of candidate CAND at position AT in iteration NITER, and
3671    stores it to VAL.  */
3672
3673 static void
3674 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3675                aff_tree *val)
3676 {
3677   aff_tree step, delta, nit;
3678   struct iv *iv = cand->iv;
3679   tree type = TREE_TYPE (iv->base);
3680   tree steptype = type;
3681   if (POINTER_TYPE_P (type))
3682     steptype = sizetype;
3683
3684   tree_to_aff_combination (iv->step, steptype, &step);
3685   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3686   aff_combination_convert (&nit, steptype);
3687   aff_combination_mult (&nit, &step, &delta);
3688   if (stmt_after_increment (loop, cand, at))
3689     aff_combination_add (&delta, &step);
3690
3691   tree_to_aff_combination (iv->base, type, val);
3692   aff_combination_add (val, &delta);
3693 }
3694
3695 /* Returns period of induction variable iv.  */
3696
3697 static tree
3698 iv_period (struct iv *iv)
3699 {
3700   tree step = iv->step, period, type;
3701   tree pow2div;
3702
3703   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3704
3705   /* Period of the iv is gcd (step, type range).  Since type range is power
3706      of two, it suffices to determine the maximum power of two that divides
3707      step.  */
3708   pow2div = num_ending_zeros (step);
3709   type = unsigned_type_for (TREE_TYPE (step));
3710
3711   period = build_low_bits_mask (type,
3712                                 (TYPE_PRECISION (type)
3713                                  - tree_low_cst (pow2div, 1)));
3714
3715   return period;
3716 }
3717
3718 /* Returns the comparison operator used when eliminating the iv USE.  */
3719
3720 static enum tree_code
3721 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3722 {
3723   struct loop *loop = data->current_loop;
3724   basic_block ex_bb;
3725   edge exit;
3726
3727   ex_bb = bb_for_stmt (use->stmt);
3728   exit = EDGE_SUCC (ex_bb, 0);
3729   if (flow_bb_inside_loop_p (loop, exit->dest))
3730     exit = EDGE_SUCC (ex_bb, 1);
3731
3732   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3733 }
3734
3735 /* Check whether it is possible to express the condition in USE by comparison
3736    of candidate CAND.  If so, store the value compared with to BOUND.  */
3737
3738 static bool
3739 may_eliminate_iv (struct ivopts_data *data,
3740                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3741 {
3742   basic_block ex_bb;
3743   edge exit;
3744   tree nit, period;
3745   struct loop *loop = data->current_loop;
3746   aff_tree bnd;
3747   double_int period_value, max_niter;
3748
3749   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3750     return false;
3751
3752   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3753      for other conditions inside loop body.  */
3754   ex_bb = bb_for_stmt (use->stmt);
3755   if (use->stmt != last_stmt (ex_bb)
3756       || TREE_CODE (use->stmt) != COND_EXPR)
3757     return false;
3758   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3759     return false;
3760
3761   exit = EDGE_SUCC (ex_bb, 0);
3762   if (flow_bb_inside_loop_p (loop, exit->dest))
3763     exit = EDGE_SUCC (ex_bb, 1);
3764   if (flow_bb_inside_loop_p (loop, exit->dest))
3765     return false;
3766
3767   nit = niter_for_exit (data, exit);
3768   if (!nit)
3769     return false;
3770
3771   /* Determine whether we may use the variable to test whether niter iterations
3772      elapsed.  This is the case iff the period of the induction variable is
3773      greater than the number of iterations.  */
3774   period = iv_period (cand->iv);
3775   if (!period)
3776     return false;
3777
3778   /* Compare the period with the estimate on the number of iterations of the
3779      loop.  */
3780   if (!estimated_loop_iterations (loop, true, &max_niter))
3781     return false;
3782   period_value = tree_to_double_int (period);
3783   if (double_int_ucmp (period_value, max_niter) <= 0)
3784     return false;
3785
3786   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3787   *bound = aff_combination_to_tree (&bnd);
3788   return true;
3789 }
3790
3791 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3792
3793 static bool
3794 determine_use_iv_cost_condition (struct ivopts_data *data,
3795                                  struct iv_use *use, struct iv_cand *cand)
3796 {
3797   tree bound = NULL_TREE;
3798   struct iv *cmp_iv;
3799   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3800   comp_cost elim_cost, express_cost, cost;
3801   bool ok;
3802
3803   /* Only consider real candidates.  */
3804   if (!cand->iv)
3805     {
3806       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3807       return false;
3808     }
3809
3810   /* Try iv elimination.  */
3811   if (may_eliminate_iv (data, use, cand, &bound))
3812     {
3813       elim_cost = force_var_cost (data, bound, &depends_on_elim);
3814       /* The bound is a loop invariant, so it will be only computed
3815          once.  */
3816       elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3817     }
3818   else
3819     elim_cost = infinite_cost;
3820
3821   /* Try expressing the original giv.  If it is compared with an invariant,
3822      note that we cannot get rid of it.  */
3823   ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3824   gcc_assert (ok);
3825
3826   express_cost = get_computation_cost (data, use, cand, false,
3827                                        &depends_on_express);
3828   fd_ivopts_data = data;
3829   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3830
3831   /* Choose the better approach.  */
3832   if (compare_costs (elim_cost, express_cost) < 0)
3833     {
3834       cost = elim_cost;
3835       depends_on = depends_on_elim;
3836       depends_on_elim = NULL;
3837     }
3838   else
3839     {
3840       cost = express_cost;
3841       depends_on = depends_on_express;
3842       depends_on_express = NULL;
3843       bound = NULL_TREE;
3844     }
3845
3846   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3847
3848   if (depends_on_elim)
3849     BITMAP_FREE (depends_on_elim);
3850   if (depends_on_express)
3851     BITMAP_FREE (depends_on_express);
3852
3853   return !infinite_cost_p (cost);
3854 }
3855
3856 /* Determines cost of basing replacement of USE on CAND.  Returns false
3857    if USE cannot be based on CAND.  */
3858
3859 static bool
3860 determine_use_iv_cost (struct ivopts_data *data,
3861                        struct iv_use *use, struct iv_cand *cand)
3862 {
3863   switch (use->type)
3864     {
3865     case USE_NONLINEAR_EXPR:
3866       return determine_use_iv_cost_generic (data, use, cand);
3867
3868     case USE_ADDRESS:
3869       return determine_use_iv_cost_address (data, use, cand);
3870
3871     case USE_COMPARE:
3872       return determine_use_iv_cost_condition (data, use, cand);
3873
3874     default:
3875       gcc_unreachable ();
3876     }
3877 }
3878
3879 /* Determines costs of basing the use of the iv on an iv candidate.  */
3880
3881 static void
3882 determine_use_iv_costs (struct ivopts_data *data)
3883 {
3884   unsigned i, j;
3885   struct iv_use *use;
3886   struct iv_cand *cand;
3887   bitmap to_clear = BITMAP_ALLOC (NULL);
3888
3889   alloc_use_cost_map (data);
3890
3891   for (i = 0; i < n_iv_uses (data); i++)
3892     {
3893       use = iv_use (data, i);
3894
3895       if (data->consider_all_candidates)
3896         {
3897           for (j = 0; j < n_iv_cands (data); j++)
3898             {
3899               cand = iv_cand (data, j);
3900               determine_use_iv_cost (data, use, cand);
3901             }
3902         }
3903       else
3904         {
3905           bitmap_iterator bi;
3906
3907           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3908             {
3909               cand = iv_cand (data, j);
3910               if (!determine_use_iv_cost (data, use, cand))
3911                 bitmap_set_bit (to_clear, j);
3912             }
3913
3914           /* Remove the candidates for that the cost is infinite from
3915              the list of related candidates.  */
3916           bitmap_and_compl_into (use->related_cands, to_clear);
3917           bitmap_clear (to_clear);
3918         }
3919     }
3920
3921   BITMAP_FREE (to_clear);
3922
3923   if (dump_file && (dump_flags & TDF_DETAILS))
3924     {
3925       fprintf (dump_file, "Use-candidate costs:\n");
3926
3927       for (i = 0; i < n_iv_uses (data); i++)
3928         {
3929           use = iv_use (data, i);
3930
3931           fprintf (dump_file, "Use %d:\n", i);
3932           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
3933           for (j = 0; j < use->n_map_members; j++)
3934             {
3935               if (!use->cost_map[j].cand
3936                   || infinite_cost_p (use->cost_map[j].cost))
3937                 continue;
3938
3939               fprintf (dump_file, "  %d\t%d\t%d\t",
3940                        use->cost_map[j].cand->id,
3941                        use->cost_map[j].cost.cost,
3942                        use->cost_map[j].cost.complexity);
3943               if (use->cost_map[j].depends_on)
3944                 bitmap_print (dump_file,
3945                               use->cost_map[j].depends_on, "","");
3946               fprintf (dump_file, "\n");
3947             }
3948
3949           fprintf (dump_file, "\n");
3950         }
3951       fprintf (dump_file, "\n");
3952     }
3953 }
3954
3955 /* Determines cost of the candidate CAND.  */
3956
3957 static void
3958 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3959 {
3960   comp_cost cost_base;
3961   unsigned cost, cost_step;
3962   tree base;
3963
3964   if (!cand->iv)
3965     {
3966       cand->cost = 0;
3967       return;
3968     }
3969
3970   /* There are two costs associated with the candidate -- its increment
3971      and its initialization.  The second is almost negligible for any loop
3972      that rolls enough, so we take it just very little into account.  */
3973
3974   base = cand->iv->base;
3975   cost_base = force_var_cost (data, base, NULL);
3976   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3977
3978   cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
3979
3980   /* Prefer the original ivs unless we may gain something by replacing it.
3981      The reason is to makee debugging simpler; so this is not relevant for
3982      artificial ivs created by other optimization passes.  */
3983   if (cand->pos != IP_ORIGINAL
3984       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3985     cost++;
3986
3987   /* Prefer not to insert statements into latch unless there are some
3988      already (so that we do not create unnecessary jumps).  */
3989   if (cand->pos == IP_END
3990       && empty_block_p (ip_end_pos (data->current_loop)))
3991     cost++;
3992
3993   cand->cost = cost;
3994 }
3995
3996 /* Determines costs of computation of the candidates.  */
3997
3998 static void
3999 determine_iv_costs (struct ivopts_data *data)
4000 {
4001   unsigned i;
4002
4003   if (dump_file && (dump_flags & TDF_DETAILS))
4004     {
4005       fprintf (dump_file, "Candidate costs:\n");
4006       fprintf (dump_file, "  cand\tcost\n");
4007     }
4008
4009   for (i = 0; i < n_iv_cands (data); i++)
4010     {
4011       struct iv_cand *cand = iv_cand (data, i);
4012
4013       determine_iv_cost (data, cand);
4014
4015       if (dump_file && (dump_flags & TDF_DETAILS))
4016         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4017     }
4018
4019   if (dump_file && (dump_flags & TDF_DETAILS))
4020     fprintf (dump_file, "\n");
4021 }
4022
4023 /* Calculates cost for having SIZE induction variables.  */
4024
4025 static unsigned
4026 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4027 {
4028   /* We add size to the cost, so that we prefer eliminating ivs
4029      if possible.  */
4030   return size + estimate_reg_pressure_cost (size, data->regs_used);
4031 }
4032
4033 /* For each size of the induction variable set determine the penalty.  */
4034
4035 static void
4036 determine_set_costs (struct ivopts_data *data)
4037 {
4038   unsigned j, n;
4039   tree phi, op;
4040   struct loop *loop = data->current_loop;
4041   bitmap_iterator bi;
4042
4043   /* We use the following model (definitely improvable, especially the
4044      cost function -- TODO):
4045
4046      We estimate the number of registers available (using MD data), name it A.
4047
4048      We estimate the number of registers used by the loop, name it U.  This
4049      number is obtained as the number of loop phi nodes (not counting virtual
4050      registers and bivs) + the number of variables from outside of the loop.
4051
4052      We set a reserve R (free regs that are used for temporary computations,
4053      etc.).  For now the reserve is a constant 3.
4054
4055      Let I be the number of induction variables.
4056
4057      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4058         make a lot of ivs without a reason).
4059      -- if A - R < U + I <= A, the cost is I * PRES_COST
4060      -- if U + I > A, the cost is I * PRES_COST and
4061         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4062
4063   if (dump_file && (dump_flags & TDF_DETAILS))
4064     {
4065       fprintf (dump_file, "Global costs:\n");
4066       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4067       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost);
4068       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4069     }
4070
4071   n = 0;
4072   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4073     {
4074       op = PHI_RESULT (phi);
4075
4076       if (!is_gimple_reg (op))
4077         continue;
4078
4079       if (get_iv (data, op))
4080         continue;
4081
4082       n++;
4083     }
4084
4085   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4086     {
4087       struct version_info *info = ver_info (data, j);
4088
4089       if (info->inv_id && info->has_nonlin_use)
4090         n++;
4091     }
4092
4093   data->regs_used = n;
4094   if (dump_file && (dump_flags & TDF_DETAILS))
4095     fprintf (dump_file, "  regs_used %d\n", n);
4096
4097   if (dump_file && (dump_flags & TDF_DETAILS))
4098     {
4099       fprintf (dump_file, "  cost for size:\n");
4100       fprintf (dump_file, "  ivs\tcost\n");
4101       for (j = 0; j <= 2 * target_avail_regs; j++)
4102         fprintf (dump_file, "  %d\t%d\n", j,
4103                  ivopts_global_cost_for_size (data, j));
4104       fprintf (dump_file, "\n");
4105     }
4106 }
4107
4108 /* Returns true if A is a cheaper cost pair than B.  */
4109
4110 static bool
4111 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4112 {
4113   int cmp;
4114
4115   if (!a)
4116     return false;
4117
4118   if (!b)
4119     return true;
4120
4121   cmp = compare_costs (a->cost, b->cost);
4122   if (cmp < 0)
4123     return true;
4124
4125   if (cmp > 0)
4126     return false;
4127
4128   /* In case the costs are the same, prefer the cheaper candidate.  */
4129   if (a->cand->cost < b->cand->cost)
4130     return true;
4131
4132   return false;
4133 }
4134
4135 /* Computes the cost field of IVS structure.  */
4136
4137 static void
4138 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4139 {
4140   comp_cost cost = ivs->cand_use_cost;
4141   cost.cost += ivs->cand_cost;
4142   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4143
4144   ivs->cost = cost;
4145 }
4146
4147 /* Remove invariants in set INVS to set IVS.  */
4148
4149 static void
4150 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4151 {
4152   bitmap_iterator bi;
4153   unsigned iid;
4154
4155   if (!invs)
4156     return;
4157
4158   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4159     {
4160       ivs->n_invariant_uses[iid]--;
4161       if (ivs->n_invariant_uses[iid] == 0)
4162         ivs->n_regs--;
4163     }
4164 }
4165
4166 /* Set USE not to be expressed by any candidate in IVS.  */
4167
4168 static void
4169 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4170                  struct iv_use *use)
4171 {
4172   unsigned uid = use->id, cid;
4173   struct cost_pair *cp;
4174
4175   cp = ivs->cand_for_use[uid];
4176   if (!cp)
4177     return;
4178   cid = cp->cand->id;
4179
4180   ivs->bad_uses++;
4181   ivs->cand_for_use[uid] = NULL;
4182   ivs->n_cand_uses[cid]--;
4183
4184   if (ivs->n_cand_uses[cid] == 0)
4185     {
4186       bitmap_clear_bit (ivs->cands, cid);
4187       /* Do not count the pseudocandidates.  */
4188       if (cp->cand->iv)
4189         ivs->n_regs--;
4190       ivs->n_cands--;
4191       ivs->cand_cost -= cp->cand->cost;
4192
4193       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4194     }
4195
4196   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4197
4198   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4199   iv_ca_recount_cost (data, ivs);
4200 }
4201
4202 /* Add invariants in set INVS to set IVS.  */
4203
4204 static void
4205 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4206 {
4207   bitmap_iterator bi;
4208   unsigned iid;
4209
4210   if (!invs)
4211     return;
4212
4213   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4214     {
4215       ivs->n_invariant_uses[iid]++;
4216       if (ivs->n_invariant_uses[iid] == 1)
4217         ivs->n_regs++;
4218     }
4219 }
4220
4221 /* Set cost pair for USE in set IVS to CP.  */
4222
4223 static void
4224 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4225               struct iv_use *use, struct cost_pair *cp)
4226 {
4227   unsigned uid = use->id, cid;
4228
4229   if (ivs->cand_for_use[uid] == cp)
4230     return;
4231
4232   if (ivs->cand_for_use[uid])
4233     iv_ca_set_no_cp (data, ivs, use);
4234
4235   if (cp)
4236     {
4237       cid = cp->cand->id;
4238
4239       ivs->bad_uses--;
4240       ivs->cand_for_use[uid] = cp;
4241       ivs->n_cand_uses[cid]++;
4242       if (ivs->n_cand_uses[cid] == 1)
4243         {
4244           bitmap_set_bit (ivs->cands, cid);
4245           /* Do not count the pseudocandidates.  */
4246           if (cp->cand->iv)
4247             ivs->n_regs++;
4248           ivs->n_cands++;
4249           ivs->cand_cost += cp->cand->cost;
4250
4251           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4252         }
4253
4254       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4255       iv_ca_set_add_invariants (ivs, cp->depends_on);
4256       iv_ca_recount_cost (data, ivs);
4257     }
4258 }
4259
4260 /* Extend set IVS by expressing USE by some of the candidates in it
4261    if possible.  */
4262
4263 static void
4264 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4265                struct iv_use *use)
4266 {
4267   struct cost_pair *best_cp = NULL, *cp;
4268   bitmap_iterator bi;
4269   unsigned i;
4270
4271   gcc_assert (ivs->upto >= use->id);
4272
4273   if (ivs->upto == use->id)
4274     {
4275       ivs->upto++;
4276       ivs->bad_uses++;
4277     }
4278
4279   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4280     {
4281       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4282
4283       if (cheaper_cost_pair (cp, best_cp))
4284         best_cp = cp;
4285     }
4286
4287   iv_ca_set_cp (data, ivs, use, best_cp);
4288 }
4289
4290 /* Get cost for assignment IVS.  */
4291
4292 static comp_cost
4293 iv_ca_cost (struct iv_ca *ivs)
4294 {
4295   return (ivs->bad_uses ? infinite_cost : ivs->cost);
4296 }
4297
4298 /* Returns true if all dependences of CP are among invariants in IVS.  */
4299
4300 static bool
4301 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4302 {
4303   unsigned i;
4304   bitmap_iterator bi;
4305
4306   if (!cp->depends_on)
4307     return true;
4308
4309   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4310     {
4311       if (ivs->n_invariant_uses[i] == 0)
4312         return false;
4313     }
4314
4315   return true;
4316 }
4317
4318 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4319    it before NEXT_CHANGE.  */
4320
4321 static struct iv_ca_delta *
4322 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4323                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4324 {
4325   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4326
4327   change->use = use;
4328   change->old_cp = old_cp;
4329   change->new_cp = new_cp;
4330   change->next_change = next_change;
4331
4332   return change;
4333 }
4334
4335 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4336    are rewritten.  */
4337
4338 static struct iv_ca_delta *
4339 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4340 {
4341   struct iv_ca_delta *last;
4342
4343   if (!l2)
4344     return l1;
4345
4346   if (!l1)
4347     return l2;
4348
4349   for (last = l1; last->next_change; last = last->next_change)
4350     continue;
4351   last->next_change = l2;
4352
4353   return l1;
4354 }
4355
4356 /* Returns candidate by that USE is expressed in IVS.  */
4357
4358 static struct cost_pair *
4359 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4360 {
4361   return ivs->cand_for_use[use->id];
4362 }
4363
4364 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4365
4366 static struct iv_ca_delta *
4367 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4368 {
4369   struct iv_ca_delta *act, *next, *prev = NULL;
4370   struct cost_pair *tmp;
4371
4372   for (act = delta; act; act = next)
4373     {
4374       next = act->next_change;
4375       act->next_change = prev;
4376       prev = act;
4377
4378       tmp = act->old_cp;
4379       act->old_cp = act->new_cp;
4380       act->new_cp = tmp;
4381     }
4382
4383   return prev;
4384 }
4385
4386 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4387    reverted instead.  */
4388
4389 static void
4390 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4391                     struct iv_ca_delta *delta, bool forward)
4392 {
4393   struct cost_pair *from, *to;
4394   struct iv_ca_delta *act;
4395
4396   if (!forward)
4397     delta = iv_ca_delta_reverse (delta);
4398
4399   for (act = delta; act; act = act->next_change)
4400     {
4401       from = act->old_cp;
4402       to = act->new_cp;
4403       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4404       iv_ca_set_cp (data, ivs, act->use, to);
4405     }
4406
4407   if (!forward)
4408     iv_ca_delta_reverse (delta);
4409 }
4410
4411 /* Returns true if CAND is used in IVS.  */
4412
4413 static bool
4414 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4415 {
4416   return ivs->n_cand_uses[cand->id] > 0;
4417 }
4418
4419 /* Returns number of induction variable candidates in the set IVS.  */
4420
4421 static unsigned
4422 iv_ca_n_cands (struct iv_ca *ivs)
4423 {
4424   return ivs->n_cands;
4425 }
4426
4427 /* Free the list of changes DELTA.  */
4428
4429 static void
4430 iv_ca_delta_free (struct iv_ca_delta **delta)
4431 {
4432   struct iv_ca_delta *act, *next;
4433
4434   for (act = *delta; act; act = next)
4435     {
4436       next = act->next_change;
4437       free (act);
4438     }
4439
4440   *delta = NULL;
4441 }
4442
4443 /* Allocates new iv candidates assignment.  */
4444
4445 static struct iv_ca *
4446 iv_ca_new (struct ivopts_data *data)
4447 {
4448   struct iv_ca *nw = XNEW (struct iv_ca);
4449
4450   nw->upto = 0;
4451   nw->bad_uses = 0;
4452   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4453   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4454   nw->cands = BITMAP_ALLOC (NULL);
4455   nw->n_cands = 0;
4456   nw->n_regs = 0;
4457   nw->cand_use_cost = zero_cost;
4458   nw->cand_cost = 0;
4459   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4460   nw->cost = zero_cost;
4461
4462   return nw;
4463 }
4464
4465 /* Free memory occupied by the set IVS.  */
4466
4467 static void
4468 iv_ca_free (struct iv_ca **ivs)
4469 {
4470   free ((*ivs)->cand_for_use);
4471   free ((*ivs)->n_cand_uses);
4472   BITMAP_FREE ((*ivs)->cands);
4473   free ((*ivs)->n_invariant_uses);
4474   free (*ivs);
4475   *ivs = NULL;
4476 }
4477
4478 /* Dumps IVS to FILE.  */
4479
4480 static void
4481 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4482 {
4483   const char *pref = "  invariants ";
4484   unsigned i;
4485   comp_cost cost = iv_ca_cost (ivs);
4486
4487   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4488   bitmap_print (file, ivs->cands, "  candidates ","\n");
4489
4490   for (i = 1; i <= data->max_inv_id; i++)
4491     if (ivs->n_invariant_uses[i])
4492       {
4493         fprintf (file, "%s%d", pref, i);
4494         pref = ", ";
4495       }
4496   fprintf (file, "\n");
4497 }
4498
4499 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4500    new set, and store differences in DELTA.  Number of induction variables
4501    in the new set is stored to N_IVS.  */
4502
4503 static comp_cost
4504 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4505               struct iv_cand *cand, struct iv_ca_delta **delta,
4506               unsigned *n_ivs)
4507 {
4508   unsigned i;
4509   comp_cost cost;
4510   struct iv_use *use;
4511   struct cost_pair *old_cp, *new_cp;
4512
4513   *delta = NULL;
4514   for (i = 0; i < ivs->upto; i++)
4515     {
4516       use = iv_use (data, i);
4517       old_cp = iv_ca_cand_for_use (ivs, use);
4518
4519       if (old_cp
4520           && old_cp->cand == cand)
4521         continue;
4522
4523       new_cp = get_use_iv_cost (data, use, cand);
4524       if (!new_cp)
4525         continue;
4526
4527       if (!iv_ca_has_deps (ivs, new_cp))
4528         continue;
4529
4530       if (!cheaper_cost_pair (new_cp, old_cp))
4531         continue;
4532
4533       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4534     }
4535
4536   iv_ca_delta_commit (data, ivs, *delta, true);
4537   cost = iv_ca_cost (ivs);
4538   if (n_ivs)
4539     *n_ivs = iv_ca_n_cands (ivs);
4540   iv_ca_delta_commit (data, ivs, *delta, false);
4541
4542   return cost;
4543 }
4544
4545 /* Try narrowing set IVS by removing CAND.  Return the cost of
4546    the new set and store the differences in DELTA.  */
4547
4548 static comp_cost
4549 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4550               struct iv_cand *cand, struct iv_ca_delta **delta)
4551 {
4552   unsigned i, ci;
4553   struct iv_use *use;
4554   struct cost_pair *old_cp, *new_cp, *cp;
4555   bitmap_iterator bi;
4556   struct iv_cand *cnd;
4557   comp_cost cost;
4558
4559   *delta = NULL;
4560   for (i = 0; i < n_iv_uses (data); i++)
4561     {
4562       use = iv_use (data, i);
4563
4564       old_cp = iv_ca_cand_for_use (ivs, use);
4565       if (old_cp->cand != cand)
4566         continue;
4567
4568       new_cp = NULL;
4569
4570       if (data->consider_all_candidates)
4571         {
4572           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4573             {
4574               if (ci == cand->id)
4575                 continue;
4576
4577               cnd = iv_cand (data, ci);
4578
4579               cp = get_use_iv_cost (data, use, cnd);
4580               if (!cp)
4581                 continue;
4582               if (!iv_ca_has_deps (ivs, cp))
4583                 continue;
4584
4585               if (!cheaper_cost_pair (cp, new_cp))
4586                 continue;
4587
4588               new_cp = cp;
4589             }
4590         }
4591       else
4592         {
4593           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4594             {
4595               if (ci == cand->id)
4596                 continue;
4597
4598               cnd = iv_cand (data, ci);
4599
4600               cp = get_use_iv_cost (data, use, cnd);
4601               if (!cp)
4602                 continue;
4603               if (!iv_ca_has_deps (ivs, cp))
4604                 continue;
4605
4606               if (!cheaper_cost_pair (cp, new_cp))
4607                 continue;
4608
4609               new_cp = cp;
4610             }
4611         }
4612
4613       if (!new_cp)
4614         {
4615           iv_ca_delta_free (delta);
4616           return infinite_cost;
4617         }
4618
4619       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4620     }
4621
4622   iv_ca_delta_commit (data, ivs, *delta, true);
4623   cost = iv_ca_cost (ivs);
4624   iv_ca_delta_commit (data, ivs, *delta, false);
4625
4626   return cost;
4627 }
4628
4629 /* Try optimizing the set of candidates IVS by removing candidates different
4630    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4631    differences in DELTA.  */
4632
4633 static comp_cost
4634 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4635              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4636 {
4637   bitmap_iterator bi;
4638   struct iv_ca_delta *act_delta, *best_delta;
4639   unsigned i;
4640   comp_cost best_cost, acost;
4641   struct iv_cand *cand;
4642
4643   best_delta = NULL;
4644   best_cost = iv_ca_cost (ivs);
4645
4646   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4647     {
4648       cand = iv_cand (data, i);
4649
4650       if (cand == except_cand)
4651         continue;
4652
4653       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4654
4655       if (compare_costs (acost, best_cost) < 0)
4656         {
4657           best_cost = acost;
4658           iv_ca_delta_free (&best_delta);
4659           best_delta = act_delta;
4660         }
4661       else
4662         iv_ca_delta_free (&act_delta);
4663     }
4664
4665   if (!best_delta)
4666     {
4667       *delta = NULL;
4668       return best_cost;
4669     }
4670
4671   /* Recurse to possibly remove other unnecessary ivs.  */
4672   iv_ca_delta_commit (data, ivs, best_delta, true);
4673   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4674   iv_ca_delta_commit (data, ivs, best_delta, false);
4675   *delta = iv_ca_delta_join (best_delta, *delta);
4676   return best_cost;
4677 }
4678
4679 /* Tries to extend the sets IVS in the best possible way in order
4680    to express the USE.  */
4681
4682 static bool
4683 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4684                   struct iv_use *use)
4685 {
4686   comp_cost best_cost, act_cost;
4687   unsigned i;
4688   bitmap_iterator bi;
4689   struct iv_cand *cand;
4690   struct iv_ca_delta *best_delta = NULL, *act_delta;
4691   struct cost_pair *cp;
4692
4693   iv_ca_add_use (data, ivs, use);
4694   best_cost = iv_ca_cost (ivs);
4695
4696   cp = iv_ca_cand_for_use (ivs, use);
4697   if (cp)
4698     {
4699       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4700       iv_ca_set_no_cp (data, ivs, use);
4701     }
4702
4703   /* First try important candidates not based on any memory object.  Only if
4704      this fails, try the specific ones.  Rationale -- in loops with many
4705      variables the best choice often is to use just one generic biv.  If we
4706      added here many ivs specific to the uses, the optimization algorithm later
4707      would be likely to get stuck in a local minimum, thus causing us to create
4708      too many ivs.  The approach from few ivs to more seems more likely to be
4709      successful -- starting from few ivs, replacing an expensive use by a
4710      specific iv should always be a win.  */
4711   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4712     {
4713       cand = iv_cand (data, i);
4714
4715       if (cand->iv->base_object != NULL_TREE)
4716         continue;
4717
4718       if (iv_ca_cand_used_p (ivs, cand))
4719         continue;
4720
4721       cp = get_use_iv_cost (data, use, cand);
4722       if (!cp)
4723         continue;
4724
4725       iv_ca_set_cp (data, ivs, use, cp);
4726       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4727       iv_ca_set_no_cp (data, ivs, use);
4728       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4729
4730       if (compare_costs (act_cost, best_cost) < 0)
4731         {
4732           best_cost = act_cost;
4733
4734           iv_ca_delta_free (&best_delta);
4735           best_delta = act_delta;
4736         }
4737       else
4738         iv_ca_delta_free (&act_delta);
4739     }
4740
4741   if (infinite_cost_p (best_cost))
4742     {
4743       for (i = 0; i < use->n_map_members; i++)
4744         {
4745           cp = use->cost_map + i;
4746           cand = cp->cand;
4747           if (!cand)
4748             continue;
4749
4750           /* Already tried this.  */
4751           if (cand->important && cand->iv->base_object == NULL_TREE)
4752             continue;
4753
4754           if (iv_ca_cand_used_p (ivs, cand))
4755             continue;
4756
4757           act_delta = NULL;
4758           iv_ca_set_cp (data, ivs, use, cp);
4759           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4760           iv_ca_set_no_cp (data, ivs, use);
4761           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4762                                        cp, act_delta);
4763
4764           if (compare_costs (act_cost, best_cost) < 0)
4765             {
4766               best_cost = act_cost;
4767
4768               if (best_delta)
4769                 iv_ca_delta_free (&best_delta);
4770               best_delta = act_delta;
4771             }
4772           else
4773             iv_ca_delta_free (&act_delta);
4774         }
4775     }
4776
4777   iv_ca_delta_commit (data, ivs, best_delta, true);
4778   iv_ca_delta_free (&best_delta);
4779
4780   return !infinite_cost_p (best_cost);
4781 }
4782
4783 /* Finds an initial assignment of candidates to uses.  */
4784
4785 static struct iv_ca *
4786 get_initial_solution (struct ivopts_data *data)
4787 {
4788   struct iv_ca *ivs = iv_ca_new (data);
4789   unsigned i;
4790
4791   for (i = 0; i < n_iv_uses (data); i++)
4792     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4793       {
4794         iv_ca_free (&ivs);
4795         return NULL;
4796       }
4797
4798   return ivs;
4799 }
4800
4801 /* Tries to improve set of induction variables IVS.  */
4802
4803 static bool
4804 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4805 {
4806   unsigned i, n_ivs;
4807   comp_cost acost, best_cost = iv_ca_cost (ivs);
4808   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4809   struct iv_cand *cand;
4810
4811   /* Try extending the set of induction variables by one.  */
4812   for (i = 0; i < n_iv_cands (data); i++)
4813     {
4814       cand = iv_cand (data, i);
4815
4816       if (iv_ca_cand_used_p (ivs, cand))
4817         continue;
4818
4819       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4820       if (!act_delta)
4821         continue;
4822
4823       /* If we successfully added the candidate and the set is small enough,
4824          try optimizing it by removing other candidates.  */
4825       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4826         {
4827           iv_ca_delta_commit (data, ivs, act_delta, true);
4828           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4829           iv_ca_delta_commit (data, ivs, act_delta, false);
4830           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4831         }
4832
4833       if (compare_costs (acost, best_cost) < 0)
4834         {
4835           best_cost = acost;
4836           iv_ca_delta_free (&best_delta);
4837           best_delta = act_delta;
4838         }
4839       else
4840         iv_ca_delta_free (&act_delta);
4841     }
4842
4843   if (!best_delta)
4844     {
4845       /* Try removing the candidates from the set instead.  */
4846       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4847
4848       /* Nothing more we can do.  */
4849       if (!best_delta)
4850         return false;
4851     }
4852
4853   iv_ca_delta_commit (data, ivs, best_delta, true);
4854   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4855   iv_ca_delta_free (&best_delta);
4856   return true;
4857 }
4858
4859 /* Attempts to find the optimal set of induction variables.  We do simple
4860    greedy heuristic -- we try to replace at most one candidate in the selected
4861    solution and remove the unused ivs while this improves the cost.  */
4862
4863 static struct iv_ca *
4864 find_optimal_iv_set (struct ivopts_data *data)
4865 {
4866   unsigned i;
4867   struct iv_ca *set;
4868   struct iv_use *use;
4869
4870   /* Get the initial solution.  */
4871   set = get_initial_solution (data);
4872   if (!set)
4873     {
4874       if (dump_file && (dump_flags & TDF_DETAILS))
4875         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4876       return NULL;
4877     }
4878
4879   if (dump_file && (dump_flags & TDF_DETAILS))
4880     {
4881       fprintf (dump_file, "Initial set of candidates:\n");
4882       iv_ca_dump (data, dump_file, set);
4883     }
4884
4885   while (try_improve_iv_set (data, set))
4886     {
4887       if (dump_file && (dump_flags & TDF_DETAILS))
4888         {
4889           fprintf (dump_file, "Improved to:\n");
4890           iv_ca_dump (data, dump_file, set);
4891         }
4892     }
4893
4894   if (dump_file && (dump_flags & TDF_DETAILS))
4895     {
4896       comp_cost cost = iv_ca_cost (set);
4897       fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4898     }
4899
4900   for (i = 0; i < n_iv_uses (data); i++)
4901     {
4902       use = iv_use (data, i);
4903       use->selected = iv_ca_cand_for_use (set, use)->cand;
4904     }
4905
4906   return set;
4907 }
4908
4909 /* Creates a new induction variable corresponding to CAND.  */
4910
4911 static void
4912 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4913 {
4914   block_stmt_iterator incr_pos;
4915   tree base;
4916   bool after = false;
4917
4918   if (!cand->iv)
4919     return;
4920
4921   switch (cand->pos)
4922     {
4923     case IP_NORMAL:
4924       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4925       break;
4926
4927     case IP_END:
4928       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4929       after = true;
4930       break;
4931
4932     case IP_ORIGINAL:
4933       /* Mark that the iv is preserved.  */
4934       name_info (data, cand->var_before)->preserve_biv = true;
4935       name_info (data, cand->var_after)->preserve_biv = true;
4936
4937       /* Rewrite the increment so that it uses var_before directly.  */
4938       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4939
4940       return;
4941     }
4942
4943   gimple_add_tmp_var (cand->var_before);
4944   add_referenced_var (cand->var_before);
4945
4946   base = unshare_expr (cand->iv->base);
4947
4948   create_iv (base, unshare_expr (cand->iv->step),
4949              cand->var_before, data->current_loop,
4950              &incr_pos, after, &cand->var_before, &cand->var_after);
4951 }
4952
4953 /* Creates new induction variables described in SET.  */
4954
4955 static void
4956 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4957 {
4958   unsigned i;
4959   struct iv_cand *cand;
4960   bitmap_iterator bi;
4961
4962   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4963     {
4964       cand = iv_cand (data, i);
4965       create_new_iv (data, cand);
4966     }
4967 }
4968
4969 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4970    is true, remove also the ssa name defined by the statement.  */
4971
4972 static void
4973 remove_statement (tree stmt, bool including_defined_name)
4974 {
4975   if (TREE_CODE (stmt) == PHI_NODE)
4976     {
4977       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4978     }
4979   else
4980     {
4981       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4982
4983       bsi_remove (&bsi, true);
4984       release_defs (stmt);
4985     }
4986 }
4987
4988 /* Rewrites USE (definition of iv used in a nonlinear expression)
4989    using candidate CAND.  */
4990
4991 static void
4992 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4993                             struct iv_use *use, struct iv_cand *cand)
4994 {
4995   tree comp;
4996   tree op, tgt, ass;
4997   block_stmt_iterator bsi;
4998
4999   /* An important special case -- if we are asked to express value of
5000      the original iv by itself, just exit; there is no need to
5001      introduce a new computation (that might also need casting the
5002      variable to unsigned and back).  */
5003   if (cand->pos == IP_ORIGINAL
5004       && cand->incremented_at == use->stmt)
5005     {
5006       tree step, ctype, utype;
5007       enum tree_code incr_code = PLUS_EXPR;
5008
5009       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
5010       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
5011
5012       step = cand->iv->step;
5013       ctype = TREE_TYPE (step);
5014       utype = TREE_TYPE (cand->var_after);
5015       if (TREE_CODE (step) == NEGATE_EXPR)
5016         {
5017           incr_code = MINUS_EXPR;
5018           step = TREE_OPERAND (step, 0);
5019         }
5020
5021       /* Check whether we may leave the computation unchanged.
5022          This is the case only if it does not rely on other
5023          computations in the loop -- otherwise, the computation
5024          we rely upon may be removed in remove_unused_ivs,
5025          thus leading to ICE.  */
5026       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
5027       if (TREE_CODE (op) == PLUS_EXPR
5028           || TREE_CODE (op) == MINUS_EXPR
5029           || TREE_CODE (op) == POINTER_PLUS_EXPR)
5030         {
5031           if (TREE_OPERAND (op, 0) == cand->var_before)
5032             op = TREE_OPERAND (op, 1);
5033           else if (TREE_CODE (op) != MINUS_EXPR
5034                    && TREE_OPERAND (op, 1) == cand->var_before)
5035             op = TREE_OPERAND (op, 0);
5036           else
5037             op = NULL_TREE;
5038         }
5039       else
5040         op = NULL_TREE;
5041
5042       if (op
5043           && (TREE_CODE (op) == INTEGER_CST
5044               || operand_equal_p (op, step, 0)))
5045         return;
5046
5047       /* Otherwise, add the necessary computations to express
5048          the iv.  */
5049       op = fold_convert (ctype, cand->var_before);
5050       comp = fold_convert (utype,
5051                            build2 (incr_code, ctype, op,
5052                                    unshare_expr (step)));
5053     }
5054   else
5055     {
5056       comp = get_computation (data->current_loop, use, cand);
5057       gcc_assert (comp != NULL_TREE);
5058     }
5059
5060   switch (TREE_CODE (use->stmt))
5061     {
5062     case PHI_NODE:
5063       tgt = PHI_RESULT (use->stmt);
5064
5065       /* If we should keep the biv, do not replace it.  */
5066       if (name_info (data, tgt)->preserve_biv)
5067         return;
5068
5069       bsi = bsi_after_labels (bb_for_stmt (use->stmt));
5070       break;
5071
5072     case GIMPLE_MODIFY_STMT:
5073       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
5074       bsi = bsi_for_stmt (use->stmt);
5075       break;
5076
5077     default:
5078       gcc_unreachable ();
5079     }
5080
5081   op = force_gimple_operand_bsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5082                                  true, BSI_SAME_STMT);
5083
5084   if (TREE_CODE (use->stmt) == PHI_NODE)
5085     {
5086       ass = build_gimple_modify_stmt (tgt, op);
5087       bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
5088       remove_statement (use->stmt, false);
5089       SSA_NAME_DEF_STMT (tgt) = ass;
5090     }
5091   else
5092     GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
5093 }
5094
5095 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5096    for_each_index.  */
5097
5098 static bool
5099 idx_remove_ssa_names (tree base, tree *idx,
5100                       void *data ATTRIBUTE_UNUSED)
5101 {
5102   tree *op;
5103
5104   if (TREE_CODE (*idx) == SSA_NAME)
5105     *idx = SSA_NAME_VAR (*idx);
5106
5107   if (TREE_CODE (base) == ARRAY_REF)
5108     {
5109       op = &TREE_OPERAND (base, 2);
5110       if (*op
5111           && TREE_CODE (*op) == SSA_NAME)
5112         *op = SSA_NAME_VAR (*op);
5113       op = &TREE_OPERAND (base, 3);
5114       if (*op
5115           && TREE_CODE (*op) == SSA_NAME)
5116         *op = SSA_NAME_VAR (*op);
5117     }
5118
5119   return true;
5120 }
5121
5122 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5123
5124 static tree
5125 unshare_and_remove_ssa_names (tree ref)
5126 {
5127   ref = unshare_expr (ref);
5128   for_each_index (&ref, idx_remove_ssa_names, NULL);
5129
5130   return ref;
5131 }
5132
5133 /* Extract the alias analysis info for the memory reference REF.  There are
5134    several ways how this information may be stored and what precisely is
5135    its semantics depending on the type of the reference, but there always is
5136    somewhere hidden one _DECL node that is used to determine the set of
5137    virtual operands for the reference.  The code below deciphers this jungle
5138    and extracts this single useful piece of information.  */
5139
5140 static tree
5141 get_ref_tag (tree ref, tree orig)
5142 {
5143   tree var = get_base_address (ref);
5144   tree aref = NULL_TREE, tag, sv;
5145   HOST_WIDE_INT offset, size, maxsize;
5146
5147   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5148     {
5149       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5150       if (ref)
5151         break;
5152     }
5153
5154   if (!var)
5155     return NULL_TREE;
5156
5157   if (TREE_CODE (var) == INDIRECT_REF)
5158     {
5159       /* If the base is a dereference of a pointer, first check its name memory
5160          tag.  If it does not have one, use its symbol memory tag.  */
5161       var = TREE_OPERAND (var, 0);
5162       if (TREE_CODE (var) != SSA_NAME)
5163         return NULL_TREE;
5164
5165       if (SSA_NAME_PTR_INFO (var))
5166         {
5167           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5168           if (tag)
5169             return tag;
5170         }
5171
5172       var = SSA_NAME_VAR (var);
5173       tag = symbol_mem_tag (var);
5174       gcc_assert (tag != NULL_TREE);
5175       return tag;
5176     }
5177   else
5178     {
5179       if (!DECL_P (var))
5180         return NULL_TREE;
5181
5182       tag = symbol_mem_tag (var);
5183       if (tag)
5184         return tag;
5185
5186       return var;
5187     }
5188 }
5189
5190 /* Copies the reference information from OLD_REF to NEW_REF.  */
5191
5192 static void
5193 copy_ref_info (tree new_ref, tree old_ref)
5194 {
5195   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5196     copy_mem_ref_info (new_ref, old_ref);
5197   else
5198     {
5199       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5200       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5201     }
5202 }
5203
5204 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5205
5206 static void
5207 rewrite_use_address (struct ivopts_data *data,
5208                      struct iv_use *use, struct iv_cand *cand)
5209 {
5210   aff_tree aff;
5211   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5212   tree ref;
5213   bool ok;
5214
5215   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5216   gcc_assert (ok);
5217   unshare_aff_combination (&aff);
5218
5219   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5220   copy_ref_info (ref, *use->op_p);
5221   *use->op_p = ref;
5222 }
5223
5224 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5225    candidate CAND.  */
5226
5227 static void
5228 rewrite_use_compare (struct ivopts_data *data,
5229                      struct iv_use *use, struct iv_cand *cand)
5230 {
5231   tree comp, *var_p, op, bound;
5232   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5233   enum tree_code compare;
5234   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5235   bool ok;
5236
5237   bound = cp->value;
5238   if (bound)
5239     {
5240       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5241       tree var_type = TREE_TYPE (var);
5242
5243       compare = iv_elimination_compare (data, use);
5244       bound = unshare_expr (fold_convert (var_type, bound));
5245       op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE,
5246                                      true, BSI_SAME_STMT);
5247
5248       *use->op_p = build2 (compare, boolean_type_node, var, op);
5249       return;
5250     }
5251
5252   /* The induction variable elimination failed; just express the original
5253      giv.  */
5254   comp = get_computation (data->current_loop, use, cand);
5255   gcc_assert (comp != NULL_TREE);
5256
5257   ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5258   gcc_assert (ok);
5259
5260   *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5261                                      true, BSI_SAME_STMT);
5262 }
5263
5264 /* Rewrites USE using candidate CAND.  */
5265
5266 static void
5267 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5268 {
5269   push_stmt_changes (&use->stmt);
5270
5271   switch (use->type)
5272     {
5273       case USE_NONLINEAR_EXPR:
5274         rewrite_use_nonlinear_expr (data, use, cand);
5275         break;
5276
5277       case USE_ADDRESS:
5278         rewrite_use_address (data, use, cand);
5279         break;
5280
5281       case USE_COMPARE:
5282         rewrite_use_compare (data, use, cand);
5283         break;
5284
5285       default:
5286         gcc_unreachable ();
5287     }
5288
5289   pop_stmt_changes (&use->stmt);
5290 }
5291
5292 /* Rewrite the uses using the selected induction variables.  */
5293
5294 static void
5295 rewrite_uses (struct ivopts_data *data)
5296 {
5297   unsigned i;
5298   struct iv_cand *cand;
5299   struct iv_use *use;
5300
5301   for (i = 0; i < n_iv_uses (data); i++)
5302     {
5303       use = iv_use (data, i);
5304       cand = use->selected;
5305       gcc_assert (cand);
5306
5307       rewrite_use (data, use, cand);
5308     }
5309 }
5310
5311 /* Removes the ivs that are not used after rewriting.  */
5312
5313 static void
5314 remove_unused_ivs (struct ivopts_data *data)
5315 {
5316   unsigned j;
5317   bitmap_iterator bi;
5318
5319   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5320     {
5321       struct version_info *info;
5322
5323       info = ver_info (data, j);
5324       if (info->iv
5325           && !integer_zerop (info->iv->step)
5326           && !info->inv_id
5327           && !info->iv->have_use_for
5328           && !info->preserve_biv)
5329         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5330     }
5331 }
5332
5333 /* Frees data allocated by the optimization of a single loop.  */
5334
5335 static void
5336 free_loop_data (struct ivopts_data *data)
5337 {
5338   unsigned i, j;
5339   bitmap_iterator bi;
5340   tree obj;
5341
5342   if (data->niters)
5343     {
5344       pointer_map_destroy (data->niters);
5345       data->niters = NULL;
5346     }
5347
5348   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5349     {
5350       struct version_info *info;
5351
5352       info = ver_info (data, i);
5353       if (info->iv)
5354         free (info->iv);
5355       info->iv = NULL;
5356       info->has_nonlin_use = false;
5357       info->preserve_biv = false;
5358       info->inv_id = 0;
5359     }
5360   bitmap_clear (data->relevant);
5361   bitmap_clear (data->important_candidates);
5362
5363   for (i = 0; i < n_iv_uses (data); i++)
5364     {
5365       struct iv_use *use = iv_use (data, i);
5366
5367       free (use->iv);
5368       BITMAP_FREE (use->related_cands);
5369       for (j = 0; j < use->n_map_members; j++)
5370         if (use->cost_map[j].depends_on)
5371           BITMAP_FREE (use->cost_map[j].depends_on);
5372       free (use->cost_map);
5373       free (use);
5374     }
5375   VEC_truncate (iv_use_p, data->iv_uses, 0);
5376
5377   for (i = 0; i < n_iv_cands (data); i++)
5378     {
5379       struct iv_cand *cand = iv_cand (data, i);
5380
5381       if (cand->iv)
5382         free (cand->iv);
5383       if (cand->depends_on)
5384         BITMAP_FREE (cand->depends_on);
5385       free (cand);
5386     }
5387   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5388
5389   if (data->version_info_size < num_ssa_names)
5390     {
5391       data->version_info_size = 2 * num_ssa_names;
5392       free (data->version_info);
5393       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5394     }
5395
5396   data->max_inv_id = 0;
5397
5398   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5399     SET_DECL_RTL (obj, NULL_RTX);
5400
5401   VEC_truncate (tree, decl_rtl_to_reset, 0);
5402 }
5403
5404 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5405    loop tree.  */
5406
5407 static void
5408 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5409 {
5410   free_loop_data (data);
5411   free (data->version_info);
5412   BITMAP_FREE (data->relevant);
5413   BITMAP_FREE (data->important_candidates);
5414
5415   VEC_free (tree, heap, decl_rtl_to_reset);
5416   VEC_free (iv_use_p, heap, data->iv_uses);
5417   VEC_free (iv_cand_p, heap, data->iv_candidates);
5418 }
5419
5420 /* Optimizes the LOOP.  Returns true if anything changed.  */
5421
5422 static bool
5423 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5424 {
5425   bool changed = false;
5426   struct iv_ca *iv_ca;
5427   edge exit;
5428
5429   gcc_assert (!data->niters);
5430   data->current_loop = loop;
5431
5432   if (dump_file && (dump_flags & TDF_DETAILS))
5433     {
5434       fprintf (dump_file, "Processing loop %d\n", loop->num);
5435
5436       exit = single_dom_exit (loop);
5437       if (exit)
5438         {
5439           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5440                    exit->src->index, exit->dest->index);
5441           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5442           fprintf (dump_file, "\n");
5443         }
5444
5445       fprintf (dump_file, "\n");
5446     }
5447
5448   /* For each ssa name determines whether it behaves as an induction variable
5449      in some loop.  */
5450   if (!find_induction_variables (data))
5451     goto finish;
5452
5453   /* Finds interesting uses (item 1).  */
5454   find_interesting_uses (data);
5455   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5456     goto finish;
5457
5458   /* Finds candidates for the induction variables (item 2).  */
5459   find_iv_candidates (data);
5460
5461   /* Calculates the costs (item 3, part 1).  */
5462   determine_use_iv_costs (data);
5463   determine_iv_costs (data);
5464   determine_set_costs (data);
5465
5466   /* Find the optimal set of induction variables (item 3, part 2).  */
5467   iv_ca = find_optimal_iv_set (data);
5468   if (!iv_ca)
5469     goto finish;
5470   changed = true;
5471
5472   /* Create the new induction variables (item 4, part 1).  */
5473   create_new_ivs (data, iv_ca);
5474   iv_ca_free (&iv_ca);
5475
5476   /* Rewrite the uses (item 4, part 2).  */
5477   rewrite_uses (data);
5478
5479   /* Remove the ivs that are unused after rewriting.  */
5480   remove_unused_ivs (data);
5481
5482   /* We have changed the structure of induction variables; it might happen
5483      that definitions in the scev database refer to some of them that were
5484      eliminated.  */
5485   scev_reset ();
5486
5487 finish:
5488   free_loop_data (data);
5489
5490   return changed;
5491 }
5492
5493 /* Main entry point.  Optimizes induction variables in loops.  */
5494
5495 void
5496 tree_ssa_iv_optimize (void)
5497 {
5498   struct loop *loop;
5499   struct ivopts_data data;
5500   loop_iterator li;
5501
5502   tree_ssa_iv_optimize_init (&data);
5503
5504   /* Optimize the loops starting with the innermost ones.  */
5505   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5506     {
5507       if (dump_file && (dump_flags & TDF_DETAILS))
5508         flow_loop_dump (loop, dump_file, NULL, 1);
5509
5510       tree_ssa_iv_optimize_loop (&data, loop);
5511     }
5512
5513   tree_ssa_iv_optimize_finalize (&data);
5514 }