gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "rtl.h"
  70 #include "tm_p.h"
  71 #include "hard-reg-set.h"
  72 #include "basic-block.h"
  73 #include "output.h"
  74 #include "diagnostic.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "varray.h"
  80 #include "expr.h"
  81 #include "tree-pass.h"
  82 #include "ggc.h"
  83 #include "insn-config.h"
  84 #include "recog.h"
  85 #include "pointer-set.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92 #include "tree-affine.h"
  93 #include "target.h"
  94
  95 /* The infinite cost.  */
  96 #define INFTY 10000000
  97
  98 /* The expected number of loop iterations.  TODO -- use profiling instead of
  99    this.  */
 100 #define AVG_LOOP_NITER(LOOP) 5
 101
 102
 103 /* Representation of the induction variable.  */
 104 struct iv
 105 {
 106   tree base;            /* Initial value of the iv.  */
 107   tree base_object;     /* A memory object to that the induction variable points.  */
 108   tree step;            /* Step of the iv (constant only).  */
 109   tree ssa_name;        /* The ssa name with the value.  */
 110   bool biv_p;           /* Is it a biv?  */
 111   bool have_use_for;    /* Do we already have a use for it?  */
 112   unsigned use_id;      /* The identifier in the use if it is the case.  */
 113 };
 114
 115 /* Per-ssa version information (induction variable descriptions, etc.).  */
 116 struct version_info
 117 {
 118   tree name;            /* The ssa name.  */
 119   struct iv *iv;        /* Induction variable description.  */
 120   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 121                            an expression that is not an induction variable.  */
 122   unsigned inv_id;      /* Id of an invariant.  */
 123   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 124 };
 125
 126 /* Types of uses.  */
 127 enum use_type
 128 {
 129   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 130   USE_ADDRESS,          /* Use in an address.  */
 131   USE_COMPARE           /* Use is a compare.  */
 132 };
 133
 134 /* Cost of a computation.  */
 135 typedef struct
 136 {
 137   unsigned cost;        /* The runtime cost.  */
 138   unsigned complexity;  /* The estimate of the complexity of the code for
 139                            the computation (in no concrete units --
 140                            complexity field should be larger for more
 141                            complex expressions and addressing modes).  */
 142 } comp_cost;
 143
 144 static const comp_cost zero_cost = {0, 0};
 145 static const comp_cost infinite_cost = {INFTY, INFTY};
 146
 147 /* The candidate - cost pair.  */
 148 struct cost_pair
 149 {
 150   struct iv_cand *cand; /* The candidate.  */
 151   comp_cost cost;       /* The cost.  */
 152   bitmap depends_on;    /* The list of invariants that have to be
 153                            preserved.  */
 154   tree value;           /* For final value elimination, the expression for
 155                            the final value of the iv.  For iv elimination,
 156                            the new bound to compare with.  */
 157 };
 158
 159 /* Use.  */
 160 struct iv_use
 161 {
 162   unsigned id;          /* The id of the use.  */
 163   enum use_type type;   /* Type of the use.  */
 164   struct iv *iv;        /* The induction variable it is based on.  */
 165   tree stmt;            /* Statement in that it occurs.  */
 166   tree *op_p;           /* The place where it occurs.  */
 167   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 168                            important ones.  */
 169
 170   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 171   struct cost_pair *cost_map;
 172                         /* The costs wrto the iv candidates.  */
 173
 174   struct iv_cand *selected;
 175                         /* The selected candidate.  */
 176 };
 177
 178 /* The position where the iv is computed.  */
 179 enum iv_position
 180 {
 181   IP_NORMAL,            /* At the end, just before the exit condition.  */
 182   IP_END,               /* At the end of the latch block.  */
 183   IP_ORIGINAL           /* The original biv.  */
 184 };
 185
 186 /* The induction variable candidate.  */
 187 struct iv_cand
 188 {
 189   unsigned id;          /* The number of the candidate.  */
 190   bool important;       /* Whether this is an "important" candidate, i.e. such
 191                            that it should be considered by all uses.  */
 192   enum iv_position pos; /* Where it is computed.  */
 193   tree incremented_at;  /* For original biv, the statement where it is
 194                            incremented.  */
 195   tree var_before;      /* The variable used for it before increment.  */
 196   tree var_after;       /* The variable used for it after increment.  */
 197   struct iv *iv;        /* The value of the candidate.  NULL for
 198                            "pseudocandidate" used to indicate the possibility
 199                            to replace the final value of an iv by direct
 200                            computation of the value.  */
 201   unsigned cost;        /* Cost of the candidate.  */
 202   bitmap depends_on;    /* The list of invariants that are used in step of the
 203                            biv.  */
 204 };
 205
 206 /* The data used by the induction variable optimizations.  */
 207
 208 typedef struct iv_use *iv_use_p;
 209 DEF_VEC_P(iv_use_p);
 210 DEF_VEC_ALLOC_P(iv_use_p,heap);
 211
 212 typedef struct iv_cand *iv_cand_p;
 213 DEF_VEC_P(iv_cand_p);
 214 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 215
 216 struct ivopts_data
 217 {
 218   /* The currently optimized loop.  */
 219   struct loop *current_loop;
 220
 221   /* Number of registers used in it.  */
 222   unsigned regs_used;
 223
 224   /* Numbers of iterations for all exits of the current loop.  */
 225   struct pointer_map_t *niters;
 226
 227   /* The size of version_info array allocated.  */
 228   unsigned version_info_size;
 229
 230   /* The array of information for the ssa names.  */
 231   struct version_info *version_info;
 232
 233   /* The bitmap of indices in version_info whose value was changed.  */
 234   bitmap relevant;
 235
 236   /* The maximum invariant id.  */
 237   unsigned max_inv_id;
 238
 239   /* The uses of induction variables.  */
 240   VEC(iv_use_p,heap) *iv_uses;
 241
 242   /* The candidates.  */
 243   VEC(iv_cand_p,heap) *iv_candidates;
 244
 245   /* A bitmap of important candidates.  */
 246   bitmap important_candidates;
 247
 248   /* Whether to consider just related and important candidates when replacing a
 249      use.  */
 250   bool consider_all_candidates;
 251 };
 252
 253 /* An assignment of iv candidates to uses.  */
 254
 255 struct iv_ca
 256 {
 257   /* The number of uses covered by the assignment.  */
 258   unsigned upto;
 259
 260   /* Number of uses that cannot be expressed by the candidates in the set.  */
 261   unsigned bad_uses;
 262
 263   /* Candidate assigned to a use, together with the related costs.  */
 264   struct cost_pair **cand_for_use;
 265
 266   /* Number of times each candidate is used.  */
 267   unsigned *n_cand_uses;
 268
 269   /* The candidates used.  */
 270   bitmap cands;
 271
 272   /* The number of candidates in the set.  */
 273   unsigned n_cands;
 274
 275   /* Total number of registers needed.  */
 276   unsigned n_regs;
 277
 278   /* Total cost of expressing uses.  */
 279   comp_cost cand_use_cost;
 280
 281   /* Total cost of candidates.  */
 282   unsigned cand_cost;
 283
 284   /* Number of times each invariant is used.  */
 285   unsigned *n_invariant_uses;
 286
 287   /* Total cost of the assignment.  */
 288   comp_cost cost;
 289 };
 290
 291 /* Difference of two iv candidate assignments.  */
 292
 293 struct iv_ca_delta
 294 {
 295   /* Changed use.  */
 296   struct iv_use *use;
 297
 298   /* An old assignment (for rollback purposes).  */
 299   struct cost_pair *old_cp;
 300
 301   /* A new assignment.  */
 302   struct cost_pair *new_cp;
 303
 304   /* Next change in the list.  */
 305   struct iv_ca_delta *next_change;
 306 };
 307
 308 /* Bound on number of candidates below that all candidates are considered.  */
 309
 310 #define CONSIDER_ALL_CANDIDATES_BOUND \
 311   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 312
 313 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 314    optimizing such a loop would help, and it would take ages).  */
 315
 316 #define MAX_CONSIDERED_USES \
 317   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 318
 319 /* If there are at most this number of ivs in the set, try removing unnecessary
 320    ivs from the set always.  */
 321
 322 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 323   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 324
 325 /* The list of trees for that the decl_rtl field must be reset is stored
 326    here.  */
 327
 328 static VEC(tree,heap) *decl_rtl_to_reset;
 329
 330 /* Number of uses recorded in DATA.  */
 331
 332 static inline unsigned
 333 n_iv_uses (struct ivopts_data *data)
 334 {
 335   return VEC_length (iv_use_p, data->iv_uses);
 336 }
 337
 338 /* Ith use recorded in DATA.  */
 339
 340 static inline struct iv_use *
 341 iv_use (struct ivopts_data *data, unsigned i)
 342 {
 343   return VEC_index (iv_use_p, data->iv_uses, i);
 344 }
 345
 346 /* Number of candidates recorded in DATA.  */
 347
 348 static inline unsigned
 349 n_iv_cands (struct ivopts_data *data)
 350 {
 351   return VEC_length (iv_cand_p, data->iv_candidates);
 352 }
 353
 354 /* Ith candidate recorded in DATA.  */
 355
 356 static inline struct iv_cand *
 357 iv_cand (struct ivopts_data *data, unsigned i)
 358 {
 359   return VEC_index (iv_cand_p, data->iv_candidates, i);
 360 }
 361
 362 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 363
 364 edge
 365 single_dom_exit (struct loop *loop)
 366 {
 367   edge exit = single_exit (loop);
 368
 369   if (!exit)
 370     return NULL;
 371
 372   if (!just_once_each_iteration_p (loop, exit->src))
 373     return NULL;
 374
 375   return exit;
 376 }
 377
 378 /* Dumps information about the induction variable IV to FILE.  */
 379
 380 extern void dump_iv (FILE *, struct iv *);
 381 void
 382 dump_iv (FILE *file, struct iv *iv)
 383 {
 384   if (iv->ssa_name)
 385     {
 386       fprintf (file, "ssa name ");
 387       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 388       fprintf (file, "\n");
 389     }
 390
 391   fprintf (file, "  type ");
 392   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 393   fprintf (file, "\n");
 394
 395   if (iv->step)
 396     {
 397       fprintf (file, "  base ");
 398       print_generic_expr (file, iv->base, TDF_SLIM);
 399       fprintf (file, "\n");
 400
 401       fprintf (file, "  step ");
 402       print_generic_expr (file, iv->step, TDF_SLIM);
 403       fprintf (file, "\n");
 404     }
 405   else
 406     {
 407       fprintf (file, "  invariant ");
 408       print_generic_expr (file, iv->base, TDF_SLIM);
 409       fprintf (file, "\n");
 410     }
 411
 412   if (iv->base_object)
 413     {
 414       fprintf (file, "  base object ");
 415       print_generic_expr (file, iv->base_object, TDF_SLIM);
 416       fprintf (file, "\n");
 417     }
 418
 419   if (iv->biv_p)
 420     fprintf (file, "  is a biv\n");
 421 }
 422
 423 /* Dumps information about the USE to FILE.  */
 424
 425 extern void dump_use (FILE *, struct iv_use *);
 426 void
 427 dump_use (FILE *file, struct iv_use *use)
 428 {
 429   fprintf (file, "use %d\n", use->id);
 430
 431   switch (use->type)
 432     {
 433     case USE_NONLINEAR_EXPR:
 434       fprintf (file, "  generic\n");
 435       break;
 436
 437     case USE_ADDRESS:
 438       fprintf (file, "  address\n");
 439       break;
 440
 441     case USE_COMPARE:
 442       fprintf (file, "  compare\n");
 443       break;
 444
 445     default:
 446       gcc_unreachable ();
 447     }
 448
 449   fprintf (file, "  in statement ");
 450   print_generic_expr (file, use->stmt, TDF_SLIM);
 451   fprintf (file, "\n");
 452
 453   fprintf (file, "  at position ");
 454   if (use->op_p)
 455     print_generic_expr (file, *use->op_p, TDF_SLIM);
 456   fprintf (file, "\n");
 457
 458   dump_iv (file, use->iv);
 459
 460   if (use->related_cands)
 461     {
 462       fprintf (file, "  related candidates ");
 463       dump_bitmap (file, use->related_cands);
 464     }
 465 }
 466
 467 /* Dumps information about the uses to FILE.  */
 468
 469 extern void dump_uses (FILE *, struct ivopts_data *);
 470 void
 471 dump_uses (FILE *file, struct ivopts_data *data)
 472 {
 473   unsigned i;
 474   struct iv_use *use;
 475
 476   for (i = 0; i < n_iv_uses (data); i++)
 477     {
 478       use = iv_use (data, i);
 479
 480       dump_use (file, use);
 481       fprintf (file, "\n");
 482     }
 483 }
 484
 485 /* Dumps information about induction variable candidate CAND to FILE.  */
 486
 487 extern void dump_cand (FILE *, struct iv_cand *);
 488 void
 489 dump_cand (FILE *file, struct iv_cand *cand)
 490 {
 491   struct iv *iv = cand->iv;
 492
 493   fprintf (file, "candidate %d%s\n",
 494            cand->id, cand->important ? " (important)" : "");
 495
 496   if (cand->depends_on)
 497     {
 498       fprintf (file, "  depends on ");
 499       dump_bitmap (file, cand->depends_on);
 500     }
 501
 502   if (!iv)
 503     {
 504       fprintf (file, "  final value replacement\n");
 505       return;
 506     }
 507
 508   switch (cand->pos)
 509     {
 510     case IP_NORMAL:
 511       fprintf (file, "  incremented before exit test\n");
 512       break;
 513
 514     case IP_END:
 515       fprintf (file, "  incremented at end\n");
 516       break;
 517
 518     case IP_ORIGINAL:
 519       fprintf (file, "  original biv\n");
 520       break;
 521     }
 522
 523   dump_iv (file, iv);
 524 }
 525
 526 /* Returns the info for ssa version VER.  */
 527
 528 static inline struct version_info *
 529 ver_info (struct ivopts_data *data, unsigned ver)
 530 {
 531   return data->version_info + ver;
 532 }
 533
 534 /* Returns the info for ssa name NAME.  */
 535
 536 static inline struct version_info *
 537 name_info (struct ivopts_data *data, tree name)
 538 {
 539   return ver_info (data, SSA_NAME_VERSION (name));
 540 }
 541
 542 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 543    emitted in LOOP.  */
 544
 545 static bool
 546 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 547 {
 548   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 549
 550   gcc_assert (bb);
 551
 552   if (sbb == loop->latch)
 553     return true;
 554
 555   if (sbb != bb)
 556     return false;
 557
 558   return stmt == last_stmt (bb);
 559 }
 560
 561 /* Returns true if STMT if after the place where the original induction
 562    variable CAND is incremented.  */
 563
 564 static bool
 565 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 566 {
 567   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 568   basic_block stmt_bb = bb_for_stmt (stmt);
 569   block_stmt_iterator bsi;
 570
 571   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 572     return false;
 573
 574   if (stmt_bb != cand_bb)
 575     return true;
 576
 577   /* Scan the block from the end, since the original ivs are usually
 578      incremented at the end of the loop body.  */
 579   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 580     {
 581       if (bsi_stmt (bsi) == cand->incremented_at)
 582         return false;
 583       if (bsi_stmt (bsi) == stmt)
 584         return true;
 585     }
 586 }
 587
 588 /* Returns true if STMT if after the place where the induction variable
 589    CAND is incremented in LOOP.  */
 590
 591 static bool
 592 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 593 {
 594   switch (cand->pos)
 595     {
 596     case IP_END:
 597       return false;
 598
 599     case IP_NORMAL:
 600       return stmt_after_ip_normal_pos (loop, stmt);
 601
 602     case IP_ORIGINAL:
 603       return stmt_after_ip_original_pos (cand, stmt);
 604
 605     default:
 606       gcc_unreachable ();
 607     }
 608 }
 609
 610 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 611
 612 static bool
 613 abnormal_ssa_name_p (tree exp)
 614 {
 615   if (!exp)
 616     return false;
 617
 618   if (TREE_CODE (exp) != SSA_NAME)
 619     return false;
 620
 621   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 622 }
 623
 624 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 625    abnormal phi node.  Callback for for_each_index.  */
 626
 627 static bool
 628 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 629                                   void *data ATTRIBUTE_UNUSED)
 630 {
 631   if (TREE_CODE (base) == ARRAY_REF)
 632     {
 633       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 634         return false;
 635       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 636         return false;
 637     }
 638
 639   return !abnormal_ssa_name_p (*index);
 640 }
 641
 642 /* Returns true if EXPR contains a ssa name that occurs in an
 643    abnormal phi node.  */
 644
 645 bool
 646 contains_abnormal_ssa_name_p (tree expr)
 647 {
 648   enum tree_code code;
 649   enum tree_code_class codeclass;
 650
 651   if (!expr)
 652     return false;
 653
 654   code = TREE_CODE (expr);
 655   codeclass = TREE_CODE_CLASS (code);
 656
 657   if (code == SSA_NAME)
 658     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 659
 660   if (code == INTEGER_CST
 661       || is_gimple_min_invariant (expr))
 662     return false;
 663
 664   if (code == ADDR_EXPR)
 665     return !for_each_index (&TREE_OPERAND (expr, 0),
 666                             idx_contains_abnormal_ssa_name_p,
 667                             NULL);
 668
 669   switch (codeclass)
 670     {
 671     case tcc_binary:
 672     case tcc_comparison:
 673       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 674         return true;
 675
 676       /* Fallthru.  */
 677     case tcc_unary:
 678       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 679         return true;
 680
 681       break;
 682
 683     default:
 684       gcc_unreachable ();
 685     }
 686
 687   return false;
 688 }
 689
 690 /*  Returns tree describing number of iterations determined from
 691     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 692
 693 static tree
 694 niter_for_exit (struct ivopts_data *data, edge exit)
 695 {
 696   struct tree_niter_desc desc;
 697   tree niter;
 698   void **slot;
 699
 700   if (!data->niters)
 701     {
 702       data->niters = pointer_map_create ();
 703       slot = NULL;
 704     }
 705   else
 706     slot = pointer_map_contains (data->niters, exit);
 707
 708   if (!slot)
 709     {
 710       /* Try to determine number of iterations.  We must know it
 711          unconditionally (i.e., without possibility of # of iterations
 712          being zero).  Also, we cannot safely work with ssa names that
 713          appear in phi nodes on abnormal edges, so that we do not create
 714          overlapping life ranges for them (PR 27283).  */
 715       if (number_of_iterations_exit (data->current_loop,
 716                                      exit, &desc, true)
 717           && integer_zerop (desc.may_be_zero)
 718           && !contains_abnormal_ssa_name_p (desc.niter))
 719         niter = desc.niter;
 720       else
 721         niter = NULL_TREE;
 722
 723       *pointer_map_insert (data->niters, exit) = niter;
 724     }
 725   else
 726     niter = (tree) *slot;
 727
 728   return niter;
 729 }
 730
 731 /* Returns tree describing number of iterations determined from
 732    single dominating exit of DATA->current_loop, or NULL if something
 733    goes wrong.  */
 734
 735 static tree
 736 niter_for_single_dom_exit (struct ivopts_data *data)
 737 {
 738   edge exit = single_dom_exit (data->current_loop);
 739
 740   if (!exit)
 741     return NULL;
 742
 743   return niter_for_exit (data, exit);
 744 }
 745
 746 /* Initializes data structures used by the iv optimization pass, stored
 747    in DATA.  */
 748
 749 static void
 750 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 751 {
 752   data->version_info_size = 2 * num_ssa_names;
 753   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 754   data->relevant = BITMAP_ALLOC (NULL);
 755   data->important_candidates = BITMAP_ALLOC (NULL);
 756   data->max_inv_id = 0;
 757   data->niters = NULL;
 758   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 759   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 760   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 761 }
 762
 763 /* Returns a memory object to that EXPR points.  In case we are able to
 764    determine that it does not point to any such object, NULL is returned.  */
 765
 766 static tree
 767 determine_base_object (tree expr)
 768 {
 769   enum tree_code code = TREE_CODE (expr);
 770   tree base, obj;
 771
 772   /* If this is a pointer casted to any type, we need to determine
 773      the base object for the pointer; so handle conversions before
 774      throwing away non-pointer expressions.  */
 775   if (TREE_CODE (expr) == NOP_EXPR
 776       || TREE_CODE (expr) == CONVERT_EXPR)
 777     return determine_base_object (TREE_OPERAND (expr, 0));
 778
 779   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 780     return NULL_TREE;
 781
 782   switch (code)
 783     {
 784     case INTEGER_CST:
 785       return NULL_TREE;
 786
 787     case ADDR_EXPR:
 788       obj = TREE_OPERAND (expr, 0);
 789       base = get_base_address (obj);
 790
 791       if (!base)
 792         return expr;
 793
 794       if (TREE_CODE (base) == INDIRECT_REF)
 795         return determine_base_object (TREE_OPERAND (base, 0));
 796
 797       return fold_convert (ptr_type_node,
 798                            build_fold_addr_expr (base));
 799
 800     case POINTER_PLUS_EXPR:
 801       return determine_base_object (TREE_OPERAND (expr, 0));
 802
 803     case PLUS_EXPR:
 804     case MINUS_EXPR:
 805       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 806       gcc_unreachable ();
 807
 808     default:
 809       return fold_convert (ptr_type_node, expr);
 810     }
 811 }
 812
 813 /* Allocates an induction variable with given initial value BASE and step STEP
 814    for loop LOOP.  */
 815
 816 static struct iv *
 817 alloc_iv (tree base, tree step)
 818 {
 819   struct iv *iv = XCNEW (struct iv);
 820   gcc_assert (step != NULL_TREE);
 821
 822   iv->base = base;
 823   iv->base_object = determine_base_object (base);
 824   iv->step = step;
 825   iv->biv_p = false;
 826   iv->have_use_for = false;
 827   iv->use_id = 0;
 828   iv->ssa_name = NULL_TREE;
 829
 830   return iv;
 831 }
 832
 833 /* Sets STEP and BASE for induction variable IV.  */
 834
 835 static void
 836 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 837 {
 838   struct version_info *info = name_info (data, iv);
 839
 840   gcc_assert (!info->iv);
 841
 842   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 843   info->iv = alloc_iv (base, step);
 844   info->iv->ssa_name = iv;
 845 }
 846
 847 /* Finds induction variable declaration for VAR.  */
 848
 849 static struct iv *
 850 get_iv (struct ivopts_data *data, tree var)
 851 {
 852   basic_block bb;
 853   tree type = TREE_TYPE (var);
 854
 855   if (!POINTER_TYPE_P (type)
 856       && !INTEGRAL_TYPE_P (type))
 857     return NULL;
 858
 859   if (!name_info (data, var)->iv)
 860     {
 861       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 862
 863       if (!bb
 864           || !flow_bb_inside_loop_p (data->current_loop, bb))
 865         set_iv (data, var, var, build_int_cst (type, 0));
 866     }
 867
 868   return name_info (data, var)->iv;
 869 }
 870
 871 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 872    not define a simple affine biv with nonzero step.  */
 873
 874 static tree
 875 determine_biv_step (tree phi)
 876 {
 877   struct loop *loop = bb_for_stmt (phi)->loop_father;
 878   tree name = PHI_RESULT (phi);
 879   affine_iv iv;
 880
 881   if (!is_gimple_reg (name))
 882     return NULL_TREE;
 883
 884   if (!simple_iv (loop, phi, name, &iv, true))
 885     return NULL_TREE;
 886
 887   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 888 }
 889
 890 /* Finds basic ivs.  */
 891
 892 static bool
 893 find_bivs (struct ivopts_data *data)
 894 {
 895   tree phi, step, type, base;
 896   bool found = false;
 897   struct loop *loop = data->current_loop;
 898
 899   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 900     {
 901       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 902         continue;
 903
 904       step = determine_biv_step (phi);
 905       if (!step)
 906         continue;
 907
 908       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 909       base = expand_simple_operations (base);
 910       if (contains_abnormal_ssa_name_p (base)
 911           || contains_abnormal_ssa_name_p (step))
 912         continue;
 913
 914       type = TREE_TYPE (PHI_RESULT (phi));
 915       base = fold_convert (type, base);
 916       if (step)
 917         step = fold_convert (type, step);
 918
 919       set_iv (data, PHI_RESULT (phi), base, step);
 920       found = true;
 921     }
 922
 923   return found;
 924 }
 925
 926 /* Marks basic ivs.  */
 927
 928 static void
 929 mark_bivs (struct ivopts_data *data)
 930 {
 931   tree phi, var;
 932   struct iv *iv, *incr_iv;
 933   struct loop *loop = data->current_loop;
 934   basic_block incr_bb;
 935
 936   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 937     {
 938       iv = get_iv (data, PHI_RESULT (phi));
 939       if (!iv)
 940         continue;
 941
 942       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 943       incr_iv = get_iv (data, var);
 944       if (!incr_iv)
 945         continue;
 946
 947       /* If the increment is in the subloop, ignore it.  */
 948       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 949       if (incr_bb->loop_father != data->current_loop
 950           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 951         continue;
 952
 953       iv->biv_p = true;
 954       incr_iv->biv_p = true;
 955     }
 956 }
 957
 958 /* Checks whether STMT defines a linear induction variable and stores its
 959    parameters to IV.  */
 960
 961 static bool
 962 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 963 {
 964   tree lhs;
 965   struct loop *loop = data->current_loop;
 966
 967   iv->base = NULL_TREE;
 968   iv->step = NULL_TREE;
 969
 970   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 971     return false;
 972
 973   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 974   if (TREE_CODE (lhs) != SSA_NAME)
 975     return false;
 976
 977   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 978     return false;
 979   iv->base = expand_simple_operations (iv->base);
 980
 981   if (contains_abnormal_ssa_name_p (iv->base)
 982       || contains_abnormal_ssa_name_p (iv->step))
 983     return false;
 984
 985   return true;
 986 }
 987
 988 /* Finds general ivs in statement STMT.  */
 989
 990 static void
 991 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
 992 {
 993   affine_iv iv;
 994
 995   if (!find_givs_in_stmt_scev (data, stmt, &iv))
 996     return;
 997
 998   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
 999 }
1000
1001 /* Finds general ivs in basic block BB.  */
1002
1003 static void
1004 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1005 {
1006   block_stmt_iterator bsi;
1007
1008   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1009     find_givs_in_stmt (data, bsi_stmt (bsi));
1010 }
1011
1012 /* Finds general ivs.  */
1013
1014 static void
1015 find_givs (struct ivopts_data *data)
1016 {
1017   struct loop *loop = data->current_loop;
1018   basic_block *body = get_loop_body_in_dom_order (loop);
1019   unsigned i;
1020
1021   for (i = 0; i < loop->num_nodes; i++)
1022     find_givs_in_bb (data, body[i]);
1023   free (body);
1024 }
1025
1026 /* For each ssa name defined in LOOP determines whether it is an induction
1027    variable and if so, its initial value and step.  */
1028
1029 static bool
1030 find_induction_variables (struct ivopts_data *data)
1031 {
1032   unsigned i;
1033   bitmap_iterator bi;
1034
1035   if (!find_bivs (data))
1036     return false;
1037
1038   find_givs (data);
1039   mark_bivs (data);
1040
1041   if (dump_file && (dump_flags & TDF_DETAILS))
1042     {
1043       tree niter = niter_for_single_dom_exit (data);
1044
1045       if (niter)
1046         {
1047           fprintf (dump_file, "  number of iterations ");
1048           print_generic_expr (dump_file, niter, TDF_SLIM);
1049           fprintf (dump_file, "\n\n");
1050         };
1051
1052       fprintf (dump_file, "Induction variables:\n\n");
1053
1054       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1055         {
1056           if (ver_info (data, i)->iv)
1057             dump_iv (dump_file, ver_info (data, i)->iv);
1058         }
1059     }
1060
1061   return true;
1062 }
1063
1064 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1065
1066 static struct iv_use *
1067 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1068             tree stmt, enum use_type use_type)
1069 {
1070   struct iv_use *use = XCNEW (struct iv_use);
1071
1072   use->id = n_iv_uses (data);
1073   use->type = use_type;
1074   use->iv = iv;
1075   use->stmt = stmt;
1076   use->op_p = use_p;
1077   use->related_cands = BITMAP_ALLOC (NULL);
1078
1079   /* To avoid showing ssa name in the dumps, if it was not reset by the
1080      caller.  */
1081   iv->ssa_name = NULL_TREE;
1082
1083   if (dump_file && (dump_flags & TDF_DETAILS))
1084     dump_use (dump_file, use);
1085
1086   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1087
1088   return use;
1089 }
1090
1091 /* Checks whether OP is a loop-level invariant and if so, records it.
1092    NONLINEAR_USE is true if the invariant is used in a way we do not
1093    handle specially.  */
1094
1095 static void
1096 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1097 {
1098   basic_block bb;
1099   struct version_info *info;
1100
1101   if (TREE_CODE (op) != SSA_NAME
1102       || !is_gimple_reg (op))
1103     return;
1104
1105   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1106   if (bb
1107       && flow_bb_inside_loop_p (data->current_loop, bb))
1108     return;
1109
1110   info = name_info (data, op);
1111   info->name = op;
1112   info->has_nonlin_use |= nonlinear_use;
1113   if (!info->inv_id)
1114     info->inv_id = ++data->max_inv_id;
1115   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1116 }
1117
1118 /* Checks whether the use OP is interesting and if so, records it.  */
1119
1120 static struct iv_use *
1121 find_interesting_uses_op (struct ivopts_data *data, tree op)
1122 {
1123   struct iv *iv;
1124   struct iv *civ;
1125   tree stmt;
1126   struct iv_use *use;
1127
1128   if (TREE_CODE (op) != SSA_NAME)
1129     return NULL;
1130
1131   iv = get_iv (data, op);
1132   if (!iv)
1133     return NULL;
1134
1135   if (iv->have_use_for)
1136     {
1137       use = iv_use (data, iv->use_id);
1138
1139       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1140       return use;
1141     }
1142
1143   if (integer_zerop (iv->step))
1144     {
1145       record_invariant (data, op, true);
1146       return NULL;
1147     }
1148   iv->have_use_for = true;
1149
1150   civ = XNEW (struct iv);
1151   *civ = *iv;
1152
1153   stmt = SSA_NAME_DEF_STMT (op);
1154   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1155               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1156
1157   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1158   iv->use_id = use->id;
1159
1160   return use;
1161 }
1162
1163 /* Given a condition *COND_P, checks whether it is a compare of an induction
1164    variable and an invariant.  If this is the case, CONTROL_VAR is set
1165    to location of the iv, BOUND to the location of the invariant,
1166    IV_VAR and IV_BOUND are set to the corresponding induction variable
1167    descriptions, and true is returned.  If this is not the case,
1168    CONTROL_VAR and BOUND are set to the arguments of the condition and
1169    false is returned.  */
1170
1171 static bool
1172 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1173                        tree **control_var, tree **bound,
1174                        struct iv **iv_var, struct iv **iv_bound)
1175 {
1176   /* The nodes returned when COND has just one operand.  Note that you should
1177      not modify anything in BOUND or IV_BOUND because of this.  */
1178   static struct iv const_iv;
1179   static tree zero;
1180   tree cond = *cond_p;
1181   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1182   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1183   bool ret = false;
1184
1185   zero = integer_zero_node;
1186   const_iv.step = integer_zero_node;
1187
1188   if (TREE_CODE (cond) == SSA_NAME)
1189     {
1190       op0 = cond_p;
1191       iv0 = get_iv (data, cond);
1192       ret = (iv0 && !integer_zerop (iv0->step));
1193       goto end;
1194     }
1195
1196   if (!COMPARISON_CLASS_P (cond))
1197     {
1198       op0 = cond_p;
1199       goto end;
1200     }
1201
1202   op0 = &TREE_OPERAND (cond, 0);
1203   op1 = &TREE_OPERAND (cond, 1);
1204   if (TREE_CODE (*op0) == SSA_NAME)
1205     iv0 = get_iv (data, *op0);
1206   if (TREE_CODE (*op1) == SSA_NAME)
1207     iv1 = get_iv (data, *op1);
1208
1209   /* Exactly one of the compared values must be an iv, and the other one must
1210      be an invariant.  */
1211   if (!iv0 || !iv1)
1212     goto end;
1213
1214   if (integer_zerop (iv0->step))
1215     {
1216       /* Control variable may be on the other side.  */
1217       tmp_op = op0; op0 = op1; op1 = tmp_op;
1218       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1219     }
1220   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1221
1222 end:
1223   if (control_var)
1224     *control_var = op0;;
1225   if (iv_var)
1226     *iv_var = iv0;;
1227   if (bound)
1228     *bound = op1;
1229   if (iv_bound)
1230     *iv_bound = iv1;
1231
1232   return ret;
1233 }
1234
1235 /* Checks whether the condition *COND_P in STMT is interesting
1236    and if so, records it.  */
1237
1238 static void
1239 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1240 {
1241   tree *var_p, *bound_p;
1242   struct iv *var_iv, *civ;
1243
1244   if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1245     {
1246       find_interesting_uses_op (data, *var_p);
1247       find_interesting_uses_op (data, *bound_p);
1248       return;
1249     }
1250
1251   civ = XNEW (struct iv);
1252   *civ = *var_iv;
1253   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1254 }
1255
1256 /* Returns true if expression EXPR is obviously invariant in LOOP,
1257    i.e. if all its operands are defined outside of the LOOP.  */
1258
1259 bool
1260 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1261 {
1262   basic_block def_bb;
1263   unsigned i, len;
1264
1265   if (is_gimple_min_invariant (expr))
1266     return true;
1267
1268   if (TREE_CODE (expr) == SSA_NAME)
1269     {
1270       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1271       if (def_bb
1272           && flow_bb_inside_loop_p (loop, def_bb))
1273         return false;
1274
1275       return true;
1276     }
1277
1278   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1279     return false;
1280
1281   len = TREE_OPERAND_LENGTH (expr);
1282   for (i = 0; i < len; i++)
1283     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1284       return false;
1285
1286   return true;
1287 }
1288
1289 /* Cumulates the steps of indices into DATA and replaces their values with the
1290    initial ones.  Returns false when the value of the index cannot be determined.
1291    Callback for for_each_index.  */
1292
1293 struct ifs_ivopts_data
1294 {
1295   struct ivopts_data *ivopts_data;
1296   tree stmt;
1297   tree step;
1298 };
1299
1300 static bool
1301 idx_find_step (tree base, tree *idx, void *data)
1302 {
1303   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1304   struct iv *iv;
1305   tree step, iv_base, iv_step, lbound, off;
1306   struct loop *loop = dta->ivopts_data->current_loop;
1307
1308   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1309       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1310     return false;
1311
1312   /* If base is a component ref, require that the offset of the reference
1313      be invariant.  */
1314   if (TREE_CODE (base) == COMPONENT_REF)
1315     {
1316       off = component_ref_field_offset (base);
1317       return expr_invariant_in_loop_p (loop, off);
1318     }
1319
1320   /* If base is array, first check whether we will be able to move the
1321      reference out of the loop (in order to take its address in strength
1322      reduction).  In order for this to work we need both lower bound
1323      and step to be loop invariants.  */
1324   if (TREE_CODE (base) == ARRAY_REF)
1325     {
1326       step = array_ref_element_size (base);
1327       lbound = array_ref_low_bound (base);
1328
1329       if (!expr_invariant_in_loop_p (loop, step)
1330           || !expr_invariant_in_loop_p (loop, lbound))
1331         return false;
1332     }
1333
1334   if (TREE_CODE (*idx) != SSA_NAME)
1335     return true;
1336
1337   iv = get_iv (dta->ivopts_data, *idx);
1338   if (!iv)
1339     return false;
1340
1341   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1342           *&x[0], which is not folded and does not trigger the
1343           ARRAY_REF path below.  */
1344   *idx = iv->base;
1345
1346   if (integer_zerop (iv->step))
1347     return true;
1348
1349   if (TREE_CODE (base) == ARRAY_REF)
1350     {
1351       step = array_ref_element_size (base);
1352
1353       /* We only handle addresses whose step is an integer constant.  */
1354       if (TREE_CODE (step) != INTEGER_CST)
1355         return false;
1356     }
1357   else
1358     /* The step for pointer arithmetics already is 1 byte.  */
1359     step = build_int_cst (sizetype, 1);
1360
1361   iv_base = iv->base;
1362   iv_step = iv->step;
1363   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1364                             sizetype, &iv_base, &iv_step, dta->stmt,
1365                             false))
1366     {
1367       /* The index might wrap.  */
1368       return false;
1369     }
1370
1371   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1372   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1373
1374   return true;
1375 }
1376
1377 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1378    object is passed to it in DATA.  */
1379
1380 static bool
1381 idx_record_use (tree base, tree *idx,
1382                 void *vdata)
1383 {
1384   struct ivopts_data *data = (struct ivopts_data *) vdata;
1385   find_interesting_uses_op (data, *idx);
1386   if (TREE_CODE (base) == ARRAY_REF)
1387     {
1388       find_interesting_uses_op (data, array_ref_element_size (base));
1389       find_interesting_uses_op (data, array_ref_low_bound (base));
1390     }
1391   return true;
1392 }
1393
1394 /* Returns true if memory reference REF may be unaligned.  */
1395
1396 static bool
1397 may_be_unaligned_p (tree ref)
1398 {
1399   tree base;
1400   tree base_type;
1401   HOST_WIDE_INT bitsize;
1402   HOST_WIDE_INT bitpos;
1403   tree toffset;
1404   enum machine_mode mode;
1405   int unsignedp, volatilep;
1406   unsigned base_align;
1407
1408   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1409      thus they are not misaligned.  */
1410   if (TREE_CODE (ref) == TARGET_MEM_REF)
1411     return false;
1412
1413   /* The test below is basically copy of what expr.c:normal_inner_ref
1414      does to check whether the object must be loaded by parts when
1415      STRICT_ALIGNMENT is true.  */
1416   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1417                               &unsignedp, &volatilep, true);
1418   base_type = TREE_TYPE (base);
1419   base_align = TYPE_ALIGN (base_type);
1420
1421   if (mode != BLKmode
1422       && (base_align < GET_MODE_ALIGNMENT (mode)
1423           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1424           || bitpos % BITS_PER_UNIT != 0))
1425     return true;
1426
1427   return false;
1428 }
1429
1430 /* Return true if EXPR may be non-addressable.   */
1431
1432 static bool
1433 may_be_nonaddressable_p (tree expr)
1434 {
1435   switch (TREE_CODE (expr))
1436     {
1437     case TARGET_MEM_REF:
1438       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1439          target, thus they are always addressable.  */
1440       return false;
1441
1442     case COMPONENT_REF:
1443       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1444              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1445
1446     case VIEW_CONVERT_EXPR:
1447       /* This kind of view-conversions may wrap non-addressable objects
1448          and make them look addressable.  After some processing the
1449          non-addressability may be uncovered again, causing ADDR_EXPRs
1450          of inappropriate objects to be built.  */
1451       if (AGGREGATE_TYPE_P (TREE_TYPE (expr))
1452           && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0))))
1453         return true;
1454
1455       /* ... fall through ... */
1456
1457     case ARRAY_REF:
1458     case ARRAY_RANGE_REF:
1459       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1460
1461     case CONVERT_EXPR:
1462     case NON_LVALUE_EXPR:
1463     case NOP_EXPR:
1464       return true;
1465
1466     default:
1467       break;
1468     }
1469
1470   return false;
1471 }
1472
1473 /* Finds addresses in *OP_P inside STMT.  */
1474
1475 static void
1476 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1477 {
1478   tree base = *op_p, step = build_int_cst (sizetype, 0);
1479   struct iv *civ;
1480   struct ifs_ivopts_data ifs_ivopts_data;
1481
1482   /* Do not play with volatile memory references.  A bit too conservative,
1483      perhaps, but safe.  */
1484   if (stmt_ann (stmt)->has_volatile_ops)
1485     goto fail;
1486
1487   /* Ignore bitfields for now.  Not really something terribly complicated
1488      to handle.  TODO.  */
1489   if (TREE_CODE (base) == BIT_FIELD_REF)
1490     goto fail;
1491
1492   base = unshare_expr (base);
1493
1494   if (TREE_CODE (base) == TARGET_MEM_REF)
1495     {
1496       tree type = build_pointer_type (TREE_TYPE (base));
1497       tree astep;
1498
1499       if (TMR_BASE (base)
1500           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1501         {
1502           civ = get_iv (data, TMR_BASE (base));
1503           if (!civ)
1504             goto fail;
1505
1506           TMR_BASE (base) = civ->base;
1507           step = civ->step;
1508         }
1509       if (TMR_INDEX (base)
1510           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1511         {
1512           civ = get_iv (data, TMR_INDEX (base));
1513           if (!civ)
1514             goto fail;
1515
1516           TMR_INDEX (base) = civ->base;
1517           astep = civ->step;
1518
1519           if (astep)
1520             {
1521               if (TMR_STEP (base))
1522                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1523
1524               step = fold_build2 (PLUS_EXPR, type, step, astep);
1525             }
1526         }
1527
1528       if (integer_zerop (step))
1529         goto fail;
1530       base = tree_mem_ref_addr (type, base);
1531     }
1532   else
1533     {
1534       ifs_ivopts_data.ivopts_data = data;
1535       ifs_ivopts_data.stmt = stmt;
1536       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1537       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1538           || integer_zerop (ifs_ivopts_data.step))
1539         goto fail;
1540       step = ifs_ivopts_data.step;
1541
1542       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1543       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1544
1545       /* Check that the base expression is addressable.  This needs
1546          to be done after substituting bases of IVs into it.  */
1547       if (may_be_nonaddressable_p (base))
1548         goto fail;
1549
1550       /* Moreover, on strict alignment platforms, check that it is
1551          sufficiently aligned.  */
1552       if (STRICT_ALIGNMENT && may_be_unaligned_p (base))
1553         goto fail;
1554
1555       base = build_fold_addr_expr (base);
1556
1557       /* Substituting bases of IVs into the base expression might
1558          have caused folding opportunities.  */
1559       if (TREE_CODE (base) == ADDR_EXPR)
1560         {
1561           tree *ref = &TREE_OPERAND (base, 0);
1562           while (handled_component_p (*ref))
1563             ref = &TREE_OPERAND (*ref, 0);
1564           if (TREE_CODE (*ref) == INDIRECT_REF)
1565             *ref = fold_indirect_ref (*ref);
1566         }
1567     }
1568
1569   civ = alloc_iv (base, step);
1570   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1571   return;
1572
1573 fail:
1574   for_each_index (op_p, idx_record_use, data);
1575 }
1576
1577 /* Finds and records invariants used in STMT.  */
1578
1579 static void
1580 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1581 {
1582   ssa_op_iter iter;
1583   use_operand_p use_p;
1584   tree op;
1585
1586   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1587     {
1588       op = USE_FROM_PTR (use_p);
1589       record_invariant (data, op, false);
1590     }
1591 }
1592
1593 /* Finds interesting uses of induction variables in the statement STMT.  */
1594
1595 static void
1596 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1597 {
1598   struct iv *iv;
1599   tree op, lhs, rhs;
1600   ssa_op_iter iter;
1601   use_operand_p use_p;
1602
1603   find_invariants_stmt (data, stmt);
1604
1605   if (TREE_CODE (stmt) == COND_EXPR)
1606     {
1607       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1608       return;
1609     }
1610
1611   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1612     {
1613       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1614       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1615
1616       if (TREE_CODE (lhs) == SSA_NAME)
1617         {
1618           /* If the statement defines an induction variable, the uses are not
1619              interesting by themselves.  */
1620
1621           iv = get_iv (data, lhs);
1622
1623           if (iv && !integer_zerop (iv->step))
1624             return;
1625         }
1626
1627       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1628         {
1629         case tcc_comparison:
1630           find_interesting_uses_cond (data, stmt,
1631                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1632           return;
1633
1634         case tcc_reference:
1635           find_interesting_uses_address (data, stmt,
1636                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1637           if (REFERENCE_CLASS_P (lhs))
1638             find_interesting_uses_address (data, stmt,
1639                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1640           return;
1641
1642         default: ;
1643         }
1644
1645       if (REFERENCE_CLASS_P (lhs)
1646           && is_gimple_val (rhs))
1647         {
1648           find_interesting_uses_address (data, stmt,
1649                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1650           find_interesting_uses_op (data, rhs);
1651           return;
1652         }
1653
1654       /* TODO -- we should also handle address uses of type
1655
1656          memory = call (whatever);
1657
1658          and
1659
1660          call (memory).  */
1661     }
1662
1663   if (TREE_CODE (stmt) == PHI_NODE
1664       && bb_for_stmt (stmt) == data->current_loop->header)
1665     {
1666       lhs = PHI_RESULT (stmt);
1667       iv = get_iv (data, lhs);
1668
1669       if (iv && !integer_zerop (iv->step))
1670         return;
1671     }
1672
1673   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1674     {
1675       op = USE_FROM_PTR (use_p);
1676
1677       if (TREE_CODE (op) != SSA_NAME)
1678         continue;
1679
1680       iv = get_iv (data, op);
1681       if (!iv)
1682         continue;
1683
1684       find_interesting_uses_op (data, op);
1685     }
1686 }
1687
1688 /* Finds interesting uses of induction variables outside of loops
1689    on loop exit edge EXIT.  */
1690
1691 static void
1692 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1693 {
1694   tree phi, def;
1695
1696   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1697     {
1698       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1699       if (is_gimple_reg (def))
1700         find_interesting_uses_op (data, def);
1701     }
1702 }
1703
1704 /* Finds uses of the induction variables that are interesting.  */
1705
1706 static void
1707 find_interesting_uses (struct ivopts_data *data)
1708 {
1709   basic_block bb;
1710   block_stmt_iterator bsi;
1711   tree phi;
1712   basic_block *body = get_loop_body (data->current_loop);
1713   unsigned i;
1714   struct version_info *info;
1715   edge e;
1716
1717   if (dump_file && (dump_flags & TDF_DETAILS))
1718     fprintf (dump_file, "Uses:\n\n");
1719
1720   for (i = 0; i < data->current_loop->num_nodes; i++)
1721     {
1722       edge_iterator ei;
1723       bb = body[i];
1724
1725       FOR_EACH_EDGE (e, ei, bb->succs)
1726         if (e->dest != EXIT_BLOCK_PTR
1727             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1728           find_interesting_uses_outside (data, e);
1729
1730       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1731         find_interesting_uses_stmt (data, phi);
1732       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1733         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1734     }
1735
1736   if (dump_file && (dump_flags & TDF_DETAILS))
1737     {
1738       bitmap_iterator bi;
1739
1740       fprintf (dump_file, "\n");
1741
1742       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1743         {
1744           info = ver_info (data, i);
1745           if (info->inv_id)
1746             {
1747               fprintf (dump_file, "  ");
1748               print_generic_expr (dump_file, info->name, TDF_SLIM);
1749               fprintf (dump_file, " is invariant (%d)%s\n",
1750                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1751             }
1752         }
1753
1754       fprintf (dump_file, "\n");
1755     }
1756
1757   free (body);
1758 }
1759
1760 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1761    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1762    we are at the top-level of the processed address.  */
1763
1764 static tree
1765 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1766                 unsigned HOST_WIDE_INT *offset)
1767 {
1768   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1769   enum tree_code code;
1770   tree type, orig_type = TREE_TYPE (expr);
1771   unsigned HOST_WIDE_INT off0, off1, st;
1772   tree orig_expr = expr;
1773
1774   STRIP_NOPS (expr);
1775
1776   type = TREE_TYPE (expr);
1777   code = TREE_CODE (expr);
1778   *offset = 0;
1779
1780   switch (code)
1781     {
1782     case INTEGER_CST:
1783       if (!cst_and_fits_in_hwi (expr)
1784           || integer_zerop (expr))
1785         return orig_expr;
1786
1787       *offset = int_cst_value (expr);
1788       return build_int_cst (orig_type, 0);
1789
1790     case POINTER_PLUS_EXPR:
1791     case PLUS_EXPR:
1792     case MINUS_EXPR:
1793       op0 = TREE_OPERAND (expr, 0);
1794       op1 = TREE_OPERAND (expr, 1);
1795
1796       op0 = strip_offset_1 (op0, false, false, &off0);
1797       op1 = strip_offset_1 (op1, false, false, &off1);
1798
1799       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1800       if (op0 == TREE_OPERAND (expr, 0)
1801           && op1 == TREE_OPERAND (expr, 1))
1802         return orig_expr;
1803
1804       if (integer_zerop (op1))
1805         expr = op0;
1806       else if (integer_zerop (op0))
1807         {
1808           if (code == MINUS_EXPR)
1809             expr = fold_build1 (NEGATE_EXPR, type, op1);
1810           else
1811             expr = op1;
1812         }
1813       else
1814         expr = fold_build2 (code, type, op0, op1);
1815
1816       return fold_convert (orig_type, expr);
1817
1818     case ARRAY_REF:
1819       if (!inside_addr)
1820         return orig_expr;
1821
1822       step = array_ref_element_size (expr);
1823       if (!cst_and_fits_in_hwi (step))
1824         break;
1825
1826       st = int_cst_value (step);
1827       op1 = TREE_OPERAND (expr, 1);
1828       op1 = strip_offset_1 (op1, false, false, &off1);
1829       *offset = off1 * st;
1830
1831       if (top_compref
1832           && integer_zerop (op1))
1833         {
1834           /* Strip the component reference completely.  */
1835           op0 = TREE_OPERAND (expr, 0);
1836           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1837           *offset += off0;
1838           return op0;
1839         }
1840       break;
1841
1842     case COMPONENT_REF:
1843       if (!inside_addr)
1844         return orig_expr;
1845
1846       tmp = component_ref_field_offset (expr);
1847       if (top_compref
1848           && cst_and_fits_in_hwi (tmp))
1849         {
1850           /* Strip the component reference completely.  */
1851           op0 = TREE_OPERAND (expr, 0);
1852           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1853           *offset = off0 + int_cst_value (tmp);
1854           return op0;
1855         }
1856       break;
1857
1858     case ADDR_EXPR:
1859       op0 = TREE_OPERAND (expr, 0);
1860       op0 = strip_offset_1 (op0, true, true, &off0);
1861       *offset += off0;
1862
1863       if (op0 == TREE_OPERAND (expr, 0))
1864         return orig_expr;
1865
1866       expr = build_fold_addr_expr (op0);
1867       return fold_convert (orig_type, expr);
1868
1869     case INDIRECT_REF:
1870       inside_addr = false;
1871       break;
1872
1873     default:
1874       return orig_expr;
1875     }
1876
1877   /* Default handling of expressions for that we want to recurse into
1878      the first operand.  */
1879   op0 = TREE_OPERAND (expr, 0);
1880   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1881   *offset += off0;
1882
1883   if (op0 == TREE_OPERAND (expr, 0)
1884       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1885     return orig_expr;
1886
1887   expr = copy_node (expr);
1888   TREE_OPERAND (expr, 0) = op0;
1889   if (op1)
1890     TREE_OPERAND (expr, 1) = op1;
1891
1892   /* Inside address, we might strip the top level component references,
1893      thus changing type of the expression.  Handling of ADDR_EXPR
1894      will fix that.  */
1895   expr = fold_convert (orig_type, expr);
1896
1897   return expr;
1898 }
1899
1900 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1901
1902 static tree
1903 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1904 {
1905   return strip_offset_1 (expr, false, false, offset);
1906 }
1907
1908 /* Returns variant of TYPE that can be used as base for different uses.
1909    We return unsigned type with the same precision, which avoids problems
1910    with overflows.  */
1911
1912 static tree
1913 generic_type_for (tree type)
1914 {
1915   if (POINTER_TYPE_P (type))
1916     return unsigned_type_for (type);
1917
1918   if (TYPE_UNSIGNED (type))
1919     return type;
1920
1921   return unsigned_type_for (type);
1922 }
1923
1924 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1925    the bitmap to that we should store it.  */
1926
1927 static struct ivopts_data *fd_ivopts_data;
1928 static tree
1929 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1930 {
1931   bitmap *depends_on = (bitmap *) data;
1932   struct version_info *info;
1933
1934   if (TREE_CODE (*expr_p) != SSA_NAME)
1935     return NULL_TREE;
1936   info = name_info (fd_ivopts_data, *expr_p);
1937
1938   if (!info->inv_id || info->has_nonlin_use)
1939     return NULL_TREE;
1940
1941   if (!*depends_on)
1942     *depends_on = BITMAP_ALLOC (NULL);
1943   bitmap_set_bit (*depends_on, info->inv_id);
1944
1945   return NULL_TREE;
1946 }
1947
1948 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1949    position to POS.  If USE is not NULL, the candidate is set as related to
1950    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1951    replacement of the final value of the iv by a direct computation.  */
1952
1953 static struct iv_cand *
1954 add_candidate_1 (struct ivopts_data *data,
1955                  tree base, tree step, bool important, enum iv_position pos,
1956                  struct iv_use *use, tree incremented_at)
1957 {
1958   unsigned i;
1959   struct iv_cand *cand = NULL;
1960   tree type, orig_type;
1961
1962   if (base)
1963     {
1964       orig_type = TREE_TYPE (base);
1965       type = generic_type_for (orig_type);
1966       if (type != orig_type)
1967         {
1968           base = fold_convert (type, base);
1969           step = fold_convert (type, step);
1970         }
1971     }
1972
1973   for (i = 0; i < n_iv_cands (data); i++)
1974     {
1975       cand = iv_cand (data, i);
1976
1977       if (cand->pos != pos)
1978         continue;
1979
1980       if (cand->incremented_at != incremented_at)
1981         continue;
1982
1983       if (!cand->iv)
1984         {
1985           if (!base && !step)
1986             break;
1987
1988           continue;
1989         }
1990
1991       if (!base && !step)
1992         continue;
1993
1994       if (operand_equal_p (base, cand->iv->base, 0)
1995           && operand_equal_p (step, cand->iv->step, 0))
1996         break;
1997     }
1998
1999   if (i == n_iv_cands (data))
2000     {
2001       cand = XCNEW (struct iv_cand);
2002       cand->id = i;
2003
2004       if (!base && !step)
2005         cand->iv = NULL;
2006       else
2007         cand->iv = alloc_iv (base, step);
2008
2009       cand->pos = pos;
2010       if (pos != IP_ORIGINAL && cand->iv)
2011         {
2012           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2013           cand->var_after = cand->var_before;
2014         }
2015       cand->important = important;
2016       cand->incremented_at = incremented_at;
2017       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2018
2019       if (step
2020           && TREE_CODE (step) != INTEGER_CST)
2021         {
2022           fd_ivopts_data = data;
2023           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2024         }
2025
2026       if (dump_file && (dump_flags & TDF_DETAILS))
2027         dump_cand (dump_file, cand);
2028     }
2029
2030   if (important && !cand->important)
2031     {
2032       cand->important = true;
2033       if (dump_file && (dump_flags & TDF_DETAILS))
2034         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2035     }
2036
2037   if (use)
2038     {
2039       bitmap_set_bit (use->related_cands, i);
2040       if (dump_file && (dump_flags & TDF_DETAILS))
2041         fprintf (dump_file, "Candidate %d is related to use %d\n",
2042                  cand->id, use->id);
2043     }
2044
2045   return cand;
2046 }
2047
2048 /* Returns true if incrementing the induction variable at the end of the LOOP
2049    is allowed.
2050
2051    The purpose is to avoid splitting latch edge with a biv increment, thus
2052    creating a jump, possibly confusing other optimization passes and leaving
2053    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2054    is not available (so we do not have a better alternative), or if the latch
2055    edge is already nonempty.  */
2056
2057 static bool
2058 allow_ip_end_pos_p (struct loop *loop)
2059 {
2060   if (!ip_normal_pos (loop))
2061     return true;
2062
2063   if (!empty_block_p (ip_end_pos (loop)))
2064     return true;
2065
2066   return false;
2067 }
2068
2069 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2070    position to POS.  If USE is not NULL, the candidate is set as related to
2071    it.  The candidate computation is scheduled on all available positions.  */
2072
2073 static void
2074 add_candidate (struct ivopts_data *data,
2075                tree base, tree step, bool important, struct iv_use *use)
2076 {
2077   if (ip_normal_pos (data->current_loop))
2078     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2079   if (ip_end_pos (data->current_loop)
2080       && allow_ip_end_pos_p (data->current_loop))
2081     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2082 }
2083
2084 /* Add a standard "0 + 1 * iteration" iv candidate for a
2085    type with SIZE bits.  */
2086
2087 static void
2088 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2089                                      unsigned int size)
2090 {
2091   tree type = lang_hooks.types.type_for_size (size, true);
2092   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2093                  true, NULL);
2094 }
2095
2096 /* Adds standard iv candidates.  */
2097
2098 static void
2099 add_standard_iv_candidates (struct ivopts_data *data)
2100 {
2101   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2102
2103   /* The same for a double-integer type if it is still fast enough.  */
2104   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2105     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2106 }
2107
2108
2109 /* Adds candidates bases on the old induction variable IV.  */
2110
2111 static void
2112 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2113 {
2114   tree phi, def;
2115   struct iv_cand *cand;
2116
2117   add_candidate (data, iv->base, iv->step, true, NULL);
2118
2119   /* The same, but with initial value zero.  */
2120   add_candidate (data,
2121                  build_int_cst (TREE_TYPE (iv->base), 0),
2122                  iv->step, true, NULL);
2123
2124   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2125   if (TREE_CODE (phi) == PHI_NODE)
2126     {
2127       /* Additionally record the possibility of leaving the original iv
2128          untouched.  */
2129       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2130       cand = add_candidate_1 (data,
2131                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2132                               SSA_NAME_DEF_STMT (def));
2133       cand->var_before = iv->ssa_name;
2134       cand->var_after = def;
2135     }
2136 }
2137
2138 /* Adds candidates based on the old induction variables.  */
2139
2140 static void
2141 add_old_ivs_candidates (struct ivopts_data *data)
2142 {
2143   unsigned i;
2144   struct iv *iv;
2145   bitmap_iterator bi;
2146
2147   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2148     {
2149       iv = ver_info (data, i)->iv;
2150       if (iv && iv->biv_p && !integer_zerop (iv->step))
2151         add_old_iv_candidates (data, iv);
2152     }
2153 }
2154
2155 /* Adds candidates based on the value of the induction variable IV and USE.  */
2156
2157 static void
2158 add_iv_value_candidates (struct ivopts_data *data,
2159                          struct iv *iv, struct iv_use *use)
2160 {
2161   unsigned HOST_WIDE_INT offset;
2162   tree base;
2163
2164   add_candidate (data, iv->base, iv->step, false, use);
2165
2166   /* The same, but with initial value zero.  Make such variable important,
2167      since it is generic enough so that possibly many uses may be based
2168      on it.  */
2169   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2170                  iv->step, true, use);
2171
2172   /* Third, try removing the constant offset.  */
2173   base = strip_offset (iv->base, &offset);
2174   if (offset)
2175     add_candidate (data, base, iv->step, false, use);
2176 }
2177
2178 /* Adds candidates based on the uses.  */
2179
2180 static void
2181 add_derived_ivs_candidates (struct ivopts_data *data)
2182 {
2183   unsigned i;
2184
2185   for (i = 0; i < n_iv_uses (data); i++)
2186     {
2187       struct iv_use *use = iv_use (data, i);
2188
2189       if (!use)
2190         continue;
2191
2192       switch (use->type)
2193         {
2194         case USE_NONLINEAR_EXPR:
2195         case USE_COMPARE:
2196         case USE_ADDRESS:
2197           /* Just add the ivs based on the value of the iv used here.  */
2198           add_iv_value_candidates (data, use->iv, use);
2199           break;
2200
2201         default:
2202           gcc_unreachable ();
2203         }
2204     }
2205 }
2206
2207 /* Record important candidates and add them to related_cands bitmaps
2208    if needed.  */
2209
2210 static void
2211 record_important_candidates (struct ivopts_data *data)
2212 {
2213   unsigned i;
2214   struct iv_use *use;
2215
2216   for (i = 0; i < n_iv_cands (data); i++)
2217     {
2218       struct iv_cand *cand = iv_cand (data, i);
2219
2220       if (cand->important)
2221         bitmap_set_bit (data->important_candidates, i);
2222     }
2223
2224   data->consider_all_candidates = (n_iv_cands (data)
2225                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2226
2227   if (data->consider_all_candidates)
2228     {
2229       /* We will not need "related_cands" bitmaps in this case,
2230          so release them to decrease peak memory consumption.  */
2231       for (i = 0; i < n_iv_uses (data); i++)
2232         {
2233           use = iv_use (data, i);
2234           BITMAP_FREE (use->related_cands);
2235         }
2236     }
2237   else
2238     {
2239       /* Add important candidates to the related_cands bitmaps.  */
2240       for (i = 0; i < n_iv_uses (data); i++)
2241         bitmap_ior_into (iv_use (data, i)->related_cands,
2242                          data->important_candidates);
2243     }
2244 }
2245
2246 /* Finds the candidates for the induction variables.  */
2247
2248 static void
2249 find_iv_candidates (struct ivopts_data *data)
2250 {
2251   /* Add commonly used ivs.  */
2252   add_standard_iv_candidates (data);
2253
2254   /* Add old induction variables.  */
2255   add_old_ivs_candidates (data);
2256
2257   /* Add induction variables derived from uses.  */
2258   add_derived_ivs_candidates (data);
2259
2260   /* Record the important candidates.  */
2261   record_important_candidates (data);
2262 }
2263
2264 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2265    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2266    we allocate a simple list to every use.  */
2267
2268 static void
2269 alloc_use_cost_map (struct ivopts_data *data)
2270 {
2271   unsigned i, size, s, j;
2272
2273   for (i = 0; i < n_iv_uses (data); i++)
2274     {
2275       struct iv_use *use = iv_use (data, i);
2276       bitmap_iterator bi;
2277
2278       if (data->consider_all_candidates)
2279         size = n_iv_cands (data);
2280       else
2281         {
2282           s = 0;
2283           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2284             {
2285               s++;
2286             }
2287
2288           /* Round up to the power of two, so that moduling by it is fast.  */
2289           for (size = 1; size < s; size <<= 1)
2290             continue;
2291         }
2292
2293       use->n_map_members = size;
2294       use->cost_map = XCNEWVEC (struct cost_pair, size);
2295     }
2296 }
2297
2298 /* Returns description of computation cost of expression whose runtime
2299    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2300
2301 static comp_cost
2302 new_cost (unsigned runtime, unsigned complexity)
2303 {
2304   comp_cost cost;
2305
2306   cost.cost = runtime;
2307   cost.complexity = complexity;
2308
2309   return cost;
2310 }
2311
2312 /* Adds costs COST1 and COST2.  */
2313
2314 static comp_cost
2315 add_costs (comp_cost cost1, comp_cost cost2)
2316 {
2317   cost1.cost += cost2.cost;
2318   cost1.complexity += cost2.complexity;
2319
2320   return cost1;
2321 }
2322 /* Subtracts costs COST1 and COST2.  */
2323
2324 static comp_cost
2325 sub_costs (comp_cost cost1, comp_cost cost2)
2326 {
2327   cost1.cost -= cost2.cost;
2328   cost1.complexity -= cost2.complexity;
2329
2330   return cost1;
2331 }
2332
2333 /* Returns a negative number if COST1 < COST2, a positive number if
2334    COST1 > COST2, and 0 if COST1 = COST2.  */
2335
2336 static int
2337 compare_costs (comp_cost cost1, comp_cost cost2)
2338 {
2339   if (cost1.cost == cost2.cost)
2340     return cost1.complexity - cost2.complexity;
2341
2342   return cost1.cost - cost2.cost;
2343 }
2344
2345 /* Returns true if COST is infinite.  */
2346
2347 static bool
2348 infinite_cost_p (comp_cost cost)
2349 {
2350   return cost.cost == INFTY;
2351 }
2352
2353 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2354    on invariants DEPENDS_ON and that the value used in expressing it
2355    is VALUE.*/
2356
2357 static void
2358 set_use_iv_cost (struct ivopts_data *data,
2359                  struct iv_use *use, struct iv_cand *cand,
2360                  comp_cost cost, bitmap depends_on, tree value)
2361 {
2362   unsigned i, s;
2363
2364   if (infinite_cost_p (cost))
2365     {
2366       BITMAP_FREE (depends_on);
2367       return;
2368     }
2369
2370   if (data->consider_all_candidates)
2371     {
2372       use->cost_map[cand->id].cand = cand;
2373       use->cost_map[cand->id].cost = cost;
2374       use->cost_map[cand->id].depends_on = depends_on;
2375       use->cost_map[cand->id].value = value;
2376       return;
2377     }
2378
2379   /* n_map_members is a power of two, so this computes modulo.  */
2380   s = cand->id & (use->n_map_members - 1);
2381   for (i = s; i < use->n_map_members; i++)
2382     if (!use->cost_map[i].cand)
2383       goto found;
2384   for (i = 0; i < s; i++)
2385     if (!use->cost_map[i].cand)
2386       goto found;
2387
2388   gcc_unreachable ();
2389
2390 found:
2391   use->cost_map[i].cand = cand;
2392   use->cost_map[i].cost = cost;
2393   use->cost_map[i].depends_on = depends_on;
2394   use->cost_map[i].value = value;
2395 }
2396
2397 /* Gets cost of (USE, CANDIDATE) pair.  */
2398
2399 static struct cost_pair *
2400 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2401                  struct iv_cand *cand)
2402 {
2403   unsigned i, s;
2404   struct cost_pair *ret;
2405
2406   if (!cand)
2407     return NULL;
2408
2409   if (data->consider_all_candidates)
2410     {
2411       ret = use->cost_map + cand->id;
2412       if (!ret->cand)
2413         return NULL;
2414
2415       return ret;
2416     }
2417
2418   /* n_map_members is a power of two, so this computes modulo.  */
2419   s = cand->id & (use->n_map_members - 1);
2420   for (i = s; i < use->n_map_members; i++)
2421     if (use->cost_map[i].cand == cand)
2422       return use->cost_map + i;
2423
2424   for (i = 0; i < s; i++)
2425     if (use->cost_map[i].cand == cand)
2426       return use->cost_map + i;
2427
2428   return NULL;
2429 }
2430
2431 /* Returns estimate on cost of computing SEQ.  */
2432
2433 static unsigned
2434 seq_cost (rtx seq)
2435 {
2436   unsigned cost = 0;
2437   rtx set;
2438
2439   for (; seq; seq = NEXT_INSN (seq))
2440     {
2441       set = single_set (seq);
2442       if (set)
2443         cost += rtx_cost (set, SET);
2444       else
2445         cost++;
2446     }
2447
2448   return cost;
2449 }
2450
2451 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2452 static rtx
2453 produce_memory_decl_rtl (tree obj, int *regno)
2454 {
2455   rtx x;
2456
2457   gcc_assert (obj);
2458   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2459     {
2460       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2461       x = gen_rtx_SYMBOL_REF (Pmode, name);
2462       SET_SYMBOL_REF_DECL (x, obj);
2463       x = gen_rtx_MEM (DECL_MODE (obj), x);
2464       targetm.encode_section_info (obj, x, true);
2465     }
2466   else
2467     {
2468       x = gen_raw_REG (Pmode, (*regno)++);
2469       x = gen_rtx_MEM (DECL_MODE (obj), x);
2470     }
2471
2472   return x;
2473 }
2474
2475 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2476    walk_tree.  DATA contains the actual fake register number.  */
2477
2478 static tree
2479 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2480 {
2481   tree obj = NULL_TREE;
2482   rtx x = NULL_RTX;
2483   int *regno = (int *) data;
2484
2485   switch (TREE_CODE (*expr_p))
2486     {
2487     case ADDR_EXPR:
2488       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2489            handled_component_p (*expr_p);
2490            expr_p = &TREE_OPERAND (*expr_p, 0))
2491         continue;
2492       obj = *expr_p;
2493       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2494         x = produce_memory_decl_rtl (obj, regno);
2495       break;
2496
2497     case SSA_NAME:
2498       *ws = 0;
2499       obj = SSA_NAME_VAR (*expr_p);
2500       if (!DECL_RTL_SET_P (obj))
2501         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2502       break;
2503
2504     case VAR_DECL:
2505     case PARM_DECL:
2506     case RESULT_DECL:
2507       *ws = 0;
2508       obj = *expr_p;
2509
2510       if (DECL_RTL_SET_P (obj))
2511         break;
2512
2513       if (DECL_MODE (obj) == BLKmode)
2514         x = produce_memory_decl_rtl (obj, regno);
2515       else
2516         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2517
2518       break;
2519
2520     default:
2521       break;
2522     }
2523
2524   if (x)
2525     {
2526       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2527       SET_DECL_RTL (obj, x);
2528     }
2529
2530   return NULL_TREE;
2531 }
2532
2533 /* Determines cost of the computation of EXPR.  */
2534
2535 static unsigned
2536 computation_cost (tree expr)
2537 {
2538   rtx seq, rslt;
2539   tree type = TREE_TYPE (expr);
2540   unsigned cost;
2541   /* Avoid using hard regs in ways which may be unsupported.  */
2542   int regno = LAST_VIRTUAL_REGISTER + 1;
2543
2544   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2545   start_sequence ();
2546   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2547   seq = get_insns ();
2548   end_sequence ();
2549
2550   cost = seq_cost (seq);
2551   if (MEM_P (rslt))
2552     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2553
2554   return cost;
2555 }
2556
2557 /* Returns variable containing the value of candidate CAND at statement AT.  */
2558
2559 static tree
2560 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2561 {
2562   if (stmt_after_increment (loop, cand, stmt))
2563     return cand->var_after;
2564   else
2565     return cand->var_before;
2566 }
2567
2568 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2569    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2570
2571 int
2572 tree_int_cst_sign_bit (const_tree t)
2573 {
2574   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2575   unsigned HOST_WIDE_INT w;
2576
2577   if (bitno < HOST_BITS_PER_WIDE_INT)
2578     w = TREE_INT_CST_LOW (t);
2579   else
2580     {
2581       w = TREE_INT_CST_HIGH (t);
2582       bitno -= HOST_BITS_PER_WIDE_INT;
2583     }
2584
2585   return (w >> bitno) & 1;
2586 }
2587
2588 /* If we can prove that TOP = cst * BOT for some constant cst,
2589    store cst to MUL and return true.  Otherwise return false.
2590    The returned value is always sign-extended, regardless of the
2591    signedness of TOP and BOT.  */
2592
2593 static bool
2594 constant_multiple_of (tree top, tree bot, double_int *mul)
2595 {
2596   tree mby;
2597   enum tree_code code;
2598   double_int res, p0, p1;
2599   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2600
2601   STRIP_NOPS (top);
2602   STRIP_NOPS (bot);
2603
2604   if (operand_equal_p (top, bot, 0))
2605     {
2606       *mul = double_int_one;
2607       return true;
2608     }
2609
2610   code = TREE_CODE (top);
2611   switch (code)
2612     {
2613     case MULT_EXPR:
2614       mby = TREE_OPERAND (top, 1);
2615       if (TREE_CODE (mby) != INTEGER_CST)
2616         return false;
2617
2618       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2619         return false;
2620
2621       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2622                               precision);
2623       return true;
2624
2625     case PLUS_EXPR:
2626     case MINUS_EXPR:
2627       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2628           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2629         return false;
2630
2631       if (code == MINUS_EXPR)
2632         p1 = double_int_neg (p1);
2633       *mul = double_int_sext (double_int_add (p0, p1), precision);
2634       return true;
2635
2636     case INTEGER_CST:
2637       if (TREE_CODE (bot) != INTEGER_CST)
2638         return false;
2639
2640       p0 = double_int_sext (tree_to_double_int (top), precision);
2641       p1 = double_int_sext (tree_to_double_int (bot), precision);
2642       if (double_int_zero_p (p1))
2643         return false;
2644       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2645                               precision);
2646       return double_int_zero_p (res);
2647
2648     default:
2649       return false;
2650     }
2651 }
2652
2653 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2654    same precision that is at least as wide as the precision of TYPE, stores
2655    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2656    type of A and B.  */
2657
2658 static tree
2659 determine_common_wider_type (tree *a, tree *b)
2660 {
2661   tree wider_type = NULL;
2662   tree suba, subb;
2663   tree atype = TREE_TYPE (*a);
2664
2665   if ((TREE_CODE (*a) == NOP_EXPR
2666        || TREE_CODE (*a) == CONVERT_EXPR))
2667     {
2668       suba = TREE_OPERAND (*a, 0);
2669       wider_type = TREE_TYPE (suba);
2670       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2671         return atype;
2672     }
2673   else
2674     return atype;
2675
2676   if ((TREE_CODE (*b) == NOP_EXPR
2677        || TREE_CODE (*b) == CONVERT_EXPR))
2678     {
2679       subb = TREE_OPERAND (*b, 0);
2680       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2681         return atype;
2682     }
2683   else
2684     return atype;
2685
2686   *a = suba;
2687   *b = subb;
2688   return wider_type;
2689 }
2690
2691 /* Determines the expression by that USE is expressed from induction variable
2692    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2693    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2694
2695 static bool
2696 get_computation_aff (struct loop *loop,
2697                      struct iv_use *use, struct iv_cand *cand, tree at,
2698                      struct affine_tree_combination *aff)
2699 {
2700   tree ubase = use->iv->base;
2701   tree ustep = use->iv->step;
2702   tree cbase = cand->iv->base;
2703   tree cstep = cand->iv->step, cstep_common;
2704   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2705   tree common_type, var;
2706   tree uutype;
2707   aff_tree cbase_aff, var_aff;
2708   double_int rat;
2709
2710   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2711     {
2712       /* We do not have a precision to express the values of use.  */
2713       return false;
2714     }
2715
2716   var = var_at_stmt (loop, cand, at);
2717   uutype = unsigned_type_for (utype);
2718
2719   /* If the conversion is not noop, perform it.  */
2720   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2721     {
2722       cstep = fold_convert (uutype, cstep);
2723       cbase = fold_convert (uutype, cbase);
2724       var = fold_convert (uutype, var);
2725     }
2726
2727   if (!constant_multiple_of (ustep, cstep, &rat))
2728     return false;
2729
2730   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2731      type, we achieve better folding by computing their difference in this
2732      wider type, and cast the result to UUTYPE.  We do not need to worry about
2733      overflows, as all the arithmetics will in the end be performed in UUTYPE
2734      anyway.  */
2735   common_type = determine_common_wider_type (&ubase, &cbase);
2736
2737   /* use = ubase - ratio * cbase + ratio * var.  */
2738   tree_to_aff_combination (ubase, common_type, aff);
2739   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2740   tree_to_aff_combination (var, uutype, &var_aff);
2741
2742   /* We need to shift the value if we are after the increment.  */
2743   if (stmt_after_increment (loop, cand, at))
2744     {
2745       aff_tree cstep_aff;
2746
2747       if (common_type != uutype)
2748         cstep_common = fold_convert (common_type, cstep);
2749       else
2750         cstep_common = cstep;
2751
2752       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2753       aff_combination_add (&cbase_aff, &cstep_aff);
2754     }
2755
2756   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2757   aff_combination_add (aff, &cbase_aff);
2758   if (common_type != uutype)
2759     aff_combination_convert (aff, uutype);
2760
2761   aff_combination_scale (&var_aff, rat);
2762   aff_combination_add (aff, &var_aff);
2763
2764   return true;
2765 }
2766
2767 /* Determines the expression by that USE is expressed from induction variable
2768    CAND at statement AT in LOOP.  The computation is unshared.  */
2769
2770 static tree
2771 get_computation_at (struct loop *loop,
2772                     struct iv_use *use, struct iv_cand *cand, tree at)
2773 {
2774   aff_tree aff;
2775   tree type = TREE_TYPE (use->iv->base);
2776
2777   if (!get_computation_aff (loop, use, cand, at, &aff))
2778     return NULL_TREE;
2779   unshare_aff_combination (&aff);
2780   return fold_convert (type, aff_combination_to_tree (&aff));
2781 }
2782
2783 /* Determines the expression by that USE is expressed from induction variable
2784    CAND in LOOP.  The computation is unshared.  */
2785
2786 static tree
2787 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2788 {
2789   return get_computation_at (loop, use, cand, use->stmt);
2790 }
2791
2792 /* Returns cost of addition in MODE.  */
2793
2794 static unsigned
2795 add_cost (enum machine_mode mode)
2796 {
2797   static unsigned costs[NUM_MACHINE_MODES];
2798   rtx seq;
2799   unsigned cost;
2800
2801   if (costs[mode])
2802     return costs[mode];
2803
2804   start_sequence ();
2805   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2806                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2807                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2808                  NULL_RTX);
2809   seq = get_insns ();
2810   end_sequence ();
2811
2812   cost = seq_cost (seq);
2813   if (!cost)
2814     cost = 1;
2815
2816   costs[mode] = cost;
2817
2818   if (dump_file && (dump_flags & TDF_DETAILS))
2819     fprintf (dump_file, "Addition in %s costs %d\n",
2820              GET_MODE_NAME (mode), cost);
2821   return cost;
2822 }
2823
2824 /* Entry in a hashtable of already known costs for multiplication.  */
2825 struct mbc_entry
2826 {
2827   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2828   enum machine_mode mode;       /* In mode.  */
2829   unsigned cost;                /* The cost.  */
2830 };
2831
2832 /* Counts hash value for the ENTRY.  */
2833
2834 static hashval_t
2835 mbc_entry_hash (const void *entry)
2836 {
2837   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2838
2839   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2840 }
2841
2842 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2843
2844 static int
2845 mbc_entry_eq (const void *entry1, const void *entry2)
2846 {
2847   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2848   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2849
2850   return (e1->mode == e2->mode
2851           && e1->cst == e2->cst);
2852 }
2853
2854 /* Returns cost of multiplication by constant CST in MODE.  */
2855
2856 unsigned
2857 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2858 {
2859   static htab_t costs;
2860   struct mbc_entry **cached, act;
2861   rtx seq;
2862   unsigned cost;
2863
2864   if (!costs)
2865     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2866
2867   act.mode = mode;
2868   act.cst = cst;
2869   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2870   if (*cached)
2871     return (*cached)->cost;
2872
2873   *cached = XNEW (struct mbc_entry);
2874   (*cached)->mode = mode;
2875   (*cached)->cst = cst;
2876
2877   start_sequence ();
2878   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2879                gen_int_mode (cst, mode), NULL_RTX, 0);
2880   seq = get_insns ();
2881   end_sequence ();
2882
2883   cost = seq_cost (seq);
2884
2885   if (dump_file && (dump_flags & TDF_DETAILS))
2886     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2887              (int) cst, GET_MODE_NAME (mode), cost);
2888
2889   (*cached)->cost = cost;
2890
2891   return cost;
2892 }
2893
2894 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2895    validity for a memory reference accessing memory of mode MODE.  */
2896
2897 bool
2898 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2899 {
2900 #define MAX_RATIO 128
2901   static sbitmap valid_mult[MAX_MACHINE_MODE];
2902
2903   if (!valid_mult[mode])
2904     {
2905       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2906       rtx addr;
2907       HOST_WIDE_INT i;
2908
2909       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2910       sbitmap_zero (valid_mult[mode]);
2911       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2912       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2913         {
2914           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2915           if (memory_address_p (mode, addr))
2916             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2917         }
2918
2919       if (dump_file && (dump_flags & TDF_DETAILS))
2920         {
2921           fprintf (dump_file, "  allowed multipliers:");
2922           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2923             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2924               fprintf (dump_file, " %d", (int) i);
2925           fprintf (dump_file, "\n");
2926           fprintf (dump_file, "\n");
2927         }
2928     }
2929
2930   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2931     return false;
2932
2933   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2934 }
2935
2936 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2937    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2938    variable is omitted.  Compute the cost for a memory reference that accesses
2939    a memory location of mode MEM_MODE.
2940
2941    TODO -- there must be some better way.  This all is quite crude.  */
2942
2943 static comp_cost
2944 get_address_cost (bool symbol_present, bool var_present,
2945                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2946                   enum machine_mode mem_mode)
2947 {
2948   static bool initialized[MAX_MACHINE_MODE];
2949   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2950   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2951   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2952   unsigned cost, acost, complexity;
2953   bool offset_p, ratio_p;
2954   HOST_WIDE_INT s_offset;
2955   unsigned HOST_WIDE_INT mask;
2956   unsigned bits;
2957
2958   if (!initialized[mem_mode])
2959     {
2960       HOST_WIDE_INT i;
2961       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2962       int old_cse_not_expected;
2963       unsigned sym_p, var_p, off_p, rat_p, add_c;
2964       rtx seq, addr, base;
2965       rtx reg0, reg1;
2966
2967       initialized[mem_mode] = true;
2968
2969       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2970
2971       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2972       for (i = start; i <= 1 << 20; i <<= 1)
2973         {
2974           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2975           if (!memory_address_p (mem_mode, addr))
2976             break;
2977         }
2978       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2979       off[mem_mode] = max_offset[mem_mode];
2980
2981       for (i = start; i <= 1 << 20; i <<= 1)
2982         {
2983           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2984           if (!memory_address_p (mem_mode, addr))
2985             break;
2986         }
2987       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2988
2989       if (dump_file && (dump_flags & TDF_DETAILS))
2990         {
2991           fprintf (dump_file, "get_address_cost:\n");
2992           fprintf (dump_file, "  min offset %s %d\n",
2993                    GET_MODE_NAME (mem_mode),
2994                    (int) min_offset[mem_mode]);
2995           fprintf (dump_file, "  max offset %s %d\n",
2996                    GET_MODE_NAME (mem_mode),
2997                    (int) max_offset[mem_mode]);
2998         }
2999
3000       rat[mem_mode] = 1;
3001       for (i = 2; i <= MAX_RATIO; i++)
3002         if (multiplier_allowed_in_address_p (i, mem_mode))
3003           {
3004             rat[mem_mode] = i;
3005             break;
3006           }
3007
3008       /* Compute the cost of various addressing modes.  */
3009       acost = 0;
3010       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3011       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3012
3013       for (i = 0; i < 16; i++)
3014         {
3015           sym_p = i & 1;
3016           var_p = (i >> 1) & 1;
3017           off_p = (i >> 2) & 1;
3018           rat_p = (i >> 3) & 1;
3019
3020           addr = reg0;
3021           if (rat_p)
3022             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3023                                    gen_int_mode (rat[mem_mode], Pmode));
3024
3025           if (var_p)
3026             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3027
3028           if (sym_p)
3029             {
3030               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3031               /* ??? We can run into trouble with some backends by presenting
3032                  it with symbols which havn't been properly passed through
3033                  targetm.encode_section_info.  By setting the local bit, we
3034                  enhance the probability of things working.  */
3035               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3036
3037               if (off_p)
3038                 base = gen_rtx_fmt_e (CONST, Pmode,
3039                                       gen_rtx_fmt_ee (PLUS, Pmode,
3040                                                       base,
3041                                                       gen_int_mode (off[mem_mode],
3042                                                                     Pmode)));
3043             }
3044           else if (off_p)
3045             base = gen_int_mode (off[mem_mode], Pmode);
3046           else
3047             base = NULL_RTX;
3048
3049           if (base)
3050             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3051
3052           start_sequence ();
3053           /* To avoid splitting addressing modes, pretend that no cse will
3054              follow.  */
3055           old_cse_not_expected = cse_not_expected;
3056           cse_not_expected = true;
3057           addr = memory_address (mem_mode, addr);
3058           cse_not_expected = old_cse_not_expected;
3059           seq = get_insns ();
3060           end_sequence ();
3061
3062           acost = seq_cost (seq);
3063           acost += address_cost (addr, mem_mode);
3064
3065           if (!acost)
3066             acost = 1;
3067           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3068         }
3069
3070       /* On some targets, it is quite expensive to load symbol to a register,
3071          which makes addresses that contain symbols look much more expensive.
3072          However, the symbol will have to be loaded in any case before the
3073          loop (and quite likely we have it in register already), so it does not
3074          make much sense to penalize them too heavily.  So make some final
3075          tweaks for the SYMBOL_PRESENT modes:
3076
3077          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3078          var is cheaper, use this mode with small penalty.
3079          If VAR_PRESENT is true, try whether the mode with
3080          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3081          if this is the case, use it.  */
3082       add_c = add_cost (Pmode);
3083       for (i = 0; i < 8; i++)
3084         {
3085           var_p = i & 1;
3086           off_p = (i >> 1) & 1;
3087           rat_p = (i >> 2) & 1;
3088
3089           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3090           if (var_p)
3091             acost += add_c;
3092
3093           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3094             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3095         }
3096
3097       if (dump_file && (dump_flags & TDF_DETAILS))
3098         {
3099           fprintf (dump_file, "Address costs:\n");
3100
3101           for (i = 0; i < 16; i++)
3102             {
3103               sym_p = i & 1;
3104               var_p = (i >> 1) & 1;
3105               off_p = (i >> 2) & 1;
3106               rat_p = (i >> 3) & 1;
3107
3108               fprintf (dump_file, "  ");
3109               if (sym_p)
3110                 fprintf (dump_file, "sym + ");
3111               if (var_p)
3112                 fprintf (dump_file, "var + ");
3113               if (off_p)
3114                 fprintf (dump_file, "cst + ");
3115               if (rat_p)
3116                 fprintf (dump_file, "rat * ");
3117
3118               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3119               fprintf (dump_file, "index costs %d\n", acost);
3120             }
3121           fprintf (dump_file, "\n");
3122         }
3123     }
3124
3125   bits = GET_MODE_BITSIZE (Pmode);
3126   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3127   offset &= mask;
3128   if ((offset >> (bits - 1) & 1))
3129     offset |= ~mask;
3130   s_offset = offset;
3131
3132   cost = 0;
3133   offset_p = (s_offset != 0
3134               && min_offset[mem_mode] <= s_offset
3135               && s_offset <= max_offset[mem_mode]);
3136   ratio_p = (ratio != 1
3137              && multiplier_allowed_in_address_p (ratio, mem_mode));
3138
3139   if (ratio != 1 && !ratio_p)
3140     cost += multiply_by_cost (ratio, Pmode);
3141
3142   if (s_offset && !offset_p && !symbol_present)
3143     cost += add_cost (Pmode);
3144
3145   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3146   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3147   return new_cost (cost + acost, complexity);
3148 }
3149
3150 /* Estimates cost of forcing expression EXPR into a variable.  */
3151
3152 static comp_cost
3153 force_expr_to_var_cost (tree expr)
3154 {
3155   static bool costs_initialized = false;
3156   static unsigned integer_cost;
3157   static unsigned symbol_cost;
3158   static unsigned address_cost;
3159   tree op0, op1;
3160   comp_cost cost0, cost1, cost;
3161   enum machine_mode mode;
3162
3163   if (!costs_initialized)
3164     {
3165       tree type = build_pointer_type (integer_type_node);
3166       tree var, addr;
3167       rtx x;
3168
3169       var = create_tmp_var_raw (integer_type_node, "test_var");
3170       TREE_STATIC (var) = 1;
3171       x = produce_memory_decl_rtl (var, NULL);
3172       SET_DECL_RTL (var, x);
3173
3174       integer_cost = computation_cost (build_int_cst (integer_type_node,
3175                                                       2000));
3176
3177       addr = build1 (ADDR_EXPR, type, var);
3178       symbol_cost = computation_cost (addr) + 1;
3179
3180       address_cost
3181         = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3182                                     addr,
3183                                     build_int_cst (sizetype, 2000))) + 1;
3184       if (dump_file && (dump_flags & TDF_DETAILS))
3185         {
3186           fprintf (dump_file, "force_expr_to_var_cost:\n");
3187           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3188           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3189           fprintf (dump_file, "  address %d\n", (int) address_cost);
3190           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3191           fprintf (dump_file, "\n");
3192         }
3193
3194       costs_initialized = true;
3195     }
3196
3197   STRIP_NOPS (expr);
3198
3199   if (SSA_VAR_P (expr))
3200     return zero_cost;
3201
3202   if (TREE_INVARIANT (expr))
3203     {
3204       if (TREE_CODE (expr) == INTEGER_CST)
3205         return new_cost (integer_cost, 0);
3206
3207       if (TREE_CODE (expr) == ADDR_EXPR)
3208         {
3209           tree obj = TREE_OPERAND (expr, 0);
3210
3211           if (TREE_CODE (obj) == VAR_DECL
3212               || TREE_CODE (obj) == PARM_DECL
3213               || TREE_CODE (obj) == RESULT_DECL)
3214             return new_cost (symbol_cost, 0);
3215         }
3216
3217       return new_cost (address_cost, 0);
3218     }
3219
3220   switch (TREE_CODE (expr))
3221     {
3222     case POINTER_PLUS_EXPR:
3223     case PLUS_EXPR:
3224     case MINUS_EXPR:
3225     case MULT_EXPR:
3226       op0 = TREE_OPERAND (expr, 0);
3227       op1 = TREE_OPERAND (expr, 1);
3228       STRIP_NOPS (op0);
3229       STRIP_NOPS (op1);
3230
3231       if (is_gimple_val (op0))
3232         cost0 = zero_cost;
3233       else
3234         cost0 = force_expr_to_var_cost (op0);
3235
3236       if (is_gimple_val (op1))
3237         cost1 = zero_cost;
3238       else
3239         cost1 = force_expr_to_var_cost (op1);
3240
3241       break;
3242
3243     default:
3244       /* Just an arbitrary value, FIXME.  */
3245       return new_cost (target_spill_cost, 0);
3246     }
3247
3248   mode = TYPE_MODE (TREE_TYPE (expr));
3249   switch (TREE_CODE (expr))
3250     {
3251     case POINTER_PLUS_EXPR:
3252     case PLUS_EXPR:
3253     case MINUS_EXPR:
3254       cost = new_cost (add_cost (mode), 0);
3255       break;
3256
3257     case MULT_EXPR:
3258       if (cst_and_fits_in_hwi (op0))
3259         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3260       else if (cst_and_fits_in_hwi (op1))
3261         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3262       else
3263         return new_cost (target_spill_cost, 0);
3264       break;
3265
3266     default:
3267       gcc_unreachable ();
3268     }
3269
3270   cost = add_costs (cost, cost0);
3271   cost = add_costs (cost, cost1);
3272
3273   /* Bound the cost by target_spill_cost.  The parts of complicated
3274      computations often are either loop invariant or at least can
3275      be shared between several iv uses, so letting this grow without
3276      limits would not give reasonable results.  */
3277   if (cost.cost > target_spill_cost)
3278     cost.cost = target_spill_cost;
3279
3280   return cost;
3281 }
3282
3283 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3284    invariants the computation depends on.  */
3285
3286 static comp_cost
3287 force_var_cost (struct ivopts_data *data,
3288                 tree expr, bitmap *depends_on)
3289 {
3290   if (depends_on)
3291     {
3292       fd_ivopts_data = data;
3293       walk_tree (&expr, find_depends, depends_on, NULL);
3294     }
3295
3296   return force_expr_to_var_cost (expr);
3297 }
3298
3299 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3300    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3301    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3302    invariants the computation depends on.  */
3303
3304 static comp_cost
3305 split_address_cost (struct ivopts_data *data,
3306                     tree addr, bool *symbol_present, bool *var_present,
3307                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3308 {
3309   tree core;
3310   HOST_WIDE_INT bitsize;
3311   HOST_WIDE_INT bitpos;
3312   tree toffset;
3313   enum machine_mode mode;
3314   int unsignedp, volatilep;
3315
3316   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3317                               &unsignedp, &volatilep, false);
3318
3319   if (toffset != 0
3320       || bitpos % BITS_PER_UNIT != 0
3321       || TREE_CODE (core) != VAR_DECL)
3322     {
3323       *symbol_present = false;
3324       *var_present = true;
3325       fd_ivopts_data = data;
3326       walk_tree (&addr, find_depends, depends_on, NULL);
3327       return new_cost (target_spill_cost, 0);
3328     }
3329
3330   *offset += bitpos / BITS_PER_UNIT;
3331   if (TREE_STATIC (core)
3332       || DECL_EXTERNAL (core))
3333     {
3334       *symbol_present = true;
3335       *var_present = false;
3336       return zero_cost;
3337     }
3338
3339   *symbol_present = false;
3340   *var_present = true;
3341   return zero_cost;
3342 }
3343
3344 /* Estimates cost of expressing difference of addresses E1 - E2 as
3345    var + symbol + offset.  The value of offset is added to OFFSET,
3346    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3347    part is missing.  DEPENDS_ON is a set of the invariants the computation
3348    depends on.  */
3349
3350 static comp_cost
3351 ptr_difference_cost (struct ivopts_data *data,
3352                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3353                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3354 {
3355   HOST_WIDE_INT diff = 0;
3356   comp_cost cost;
3357
3358   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3359
3360   if (ptr_difference_const (e1, e2, &diff))
3361     {
3362       *offset += diff;
3363       *symbol_present = false;
3364       *var_present = false;
3365       return zero_cost;
3366     }
3367
3368   if (integer_zerop (e2))
3369     return split_address_cost (data, TREE_OPERAND (e1, 0),
3370                                symbol_present, var_present, offset, depends_on);
3371
3372   *symbol_present = false;
3373   *var_present = true;
3374
3375   cost = force_var_cost (data, e1, depends_on);
3376   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3377   cost.cost += add_cost (Pmode);
3378
3379   return cost;
3380 }
3381
3382 /* Estimates cost of expressing difference E1 - E2 as
3383    var + symbol + offset.  The value of offset is added to OFFSET,
3384    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3385    part is missing.  DEPENDS_ON is a set of the invariants the computation
3386    depends on.  */
3387
3388 static comp_cost
3389 difference_cost (struct ivopts_data *data,
3390                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3391                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3392 {
3393   comp_cost cost;
3394   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3395   unsigned HOST_WIDE_INT off1, off2;
3396
3397   e1 = strip_offset (e1, &off1);
3398   e2 = strip_offset (e2, &off2);
3399   *offset += off1 - off2;
3400
3401   STRIP_NOPS (e1);
3402   STRIP_NOPS (e2);
3403
3404   if (TREE_CODE (e1) == ADDR_EXPR)
3405     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3406                                 depends_on);
3407   *symbol_present = false;
3408
3409   if (operand_equal_p (e1, e2, 0))
3410     {
3411       *var_present = false;
3412       return zero_cost;
3413     }
3414   *var_present = true;
3415   if (integer_zerop (e2))
3416     return force_var_cost (data, e1, depends_on);
3417
3418   if (integer_zerop (e1))
3419     {
3420       cost = force_var_cost (data, e2, depends_on);
3421       cost.cost += multiply_by_cost (-1, mode);
3422
3423       return cost;
3424     }
3425
3426   cost = force_var_cost (data, e1, depends_on);
3427   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3428   cost.cost += add_cost (mode);
3429
3430   return cost;
3431 }
3432
3433 /* Determines the cost of the computation by that USE is expressed
3434    from induction variable CAND.  If ADDRESS_P is true, we just need
3435    to create an address from it, otherwise we want to get it into
3436    register.  A set of invariants we depend on is stored in
3437    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3438
3439 static comp_cost
3440 get_computation_cost_at (struct ivopts_data *data,
3441                          struct iv_use *use, struct iv_cand *cand,
3442                          bool address_p, bitmap *depends_on, tree at)
3443 {
3444   tree ubase = use->iv->base, ustep = use->iv->step;
3445   tree cbase, cstep;
3446   tree utype = TREE_TYPE (ubase), ctype;
3447   unsigned HOST_WIDE_INT cstepi, offset = 0;
3448   HOST_WIDE_INT ratio, aratio;
3449   bool var_present, symbol_present;
3450   comp_cost cost;
3451   unsigned n_sums;
3452   double_int rat;
3453
3454   *depends_on = NULL;
3455
3456   /* Only consider real candidates.  */
3457   if (!cand->iv)
3458     return infinite_cost;
3459
3460   cbase = cand->iv->base;
3461   cstep = cand->iv->step;
3462   ctype = TREE_TYPE (cbase);
3463
3464   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3465     {
3466       /* We do not have a precision to express the values of use.  */
3467       return infinite_cost;
3468     }
3469
3470   if (address_p)
3471     {
3472       /* Do not try to express address of an object with computation based
3473          on address of a different object.  This may cause problems in rtl
3474          level alias analysis (that does not expect this to be happening,
3475          as this is illegal in C), and would be unlikely to be useful
3476          anyway.  */
3477       if (use->iv->base_object
3478           && cand->iv->base_object
3479           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3480         return infinite_cost;
3481     }
3482
3483   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3484     {
3485       /* TODO -- add direct handling of this case.  */
3486       goto fallback;
3487     }
3488
3489   /* CSTEPI is removed from the offset in case statement is after the
3490      increment.  If the step is not constant, we use zero instead.
3491      This is a bit imprecise (there is the extra addition), but
3492      redundancy elimination is likely to transform the code so that
3493      it uses value of the variable before increment anyway,
3494      so it is not that much unrealistic.  */
3495   if (cst_and_fits_in_hwi (cstep))
3496     cstepi = int_cst_value (cstep);
3497   else
3498     cstepi = 0;
3499
3500   if (!constant_multiple_of (ustep, cstep, &rat))
3501     return infinite_cost;
3502
3503   if (double_int_fits_in_shwi_p (rat))
3504     ratio = double_int_to_shwi (rat);
3505   else
3506     return infinite_cost;
3507
3508   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3509      or ratio == 1, it is better to handle this like
3510
3511      ubase - ratio * cbase + ratio * var
3512
3513      (also holds in the case ratio == -1, TODO.  */
3514
3515   if (cst_and_fits_in_hwi (cbase))
3516     {
3517       offset = - ratio * int_cst_value (cbase);
3518       cost = difference_cost (data,
3519                               ubase, build_int_cst (utype, 0),
3520                               &symbol_present, &var_present, &offset,
3521                               depends_on);
3522     }
3523   else if (ratio == 1)
3524     {
3525       cost = difference_cost (data,
3526                               ubase, cbase,
3527                               &symbol_present, &var_present, &offset,
3528                               depends_on);
3529     }
3530   else
3531     {
3532       cost = force_var_cost (data, cbase, depends_on);
3533       cost.cost += add_cost (TYPE_MODE (ctype));
3534       cost = add_costs (cost,
3535                         difference_cost (data,
3536                                          ubase, build_int_cst (utype, 0),
3537                                          &symbol_present, &var_present,
3538                                          &offset, depends_on));
3539     }
3540
3541   /* If we are after the increment, the value of the candidate is higher by
3542      one iteration.  */
3543   if (stmt_after_increment (data->current_loop, cand, at))
3544     offset -= ratio * cstepi;
3545
3546   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3547      (symbol/var/const parts may be omitted).  If we are looking for an address,
3548      find the cost of addressing this.  */
3549   if (address_p)
3550     return add_costs (cost, get_address_cost (symbol_present, var_present,
3551                                 offset, ratio,
3552                                 TYPE_MODE (TREE_TYPE (*use->op_p))));
3553
3554   /* Otherwise estimate the costs for computing the expression.  */
3555   aratio = ratio > 0 ? ratio : -ratio;
3556   if (!symbol_present && !var_present && !offset)
3557     {
3558       if (ratio != 1)
3559         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3560
3561       return cost;
3562     }
3563
3564   if (aratio != 1)
3565     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3566
3567   n_sums = 1;
3568   if (var_present
3569       /* Symbol + offset should be compile-time computable.  */
3570       && (symbol_present || offset))
3571     n_sums++;
3572
3573   /* Having offset does not affect runtime cost in case it is added to
3574      symbol, but it increases complexity.  */
3575   if (offset)
3576     cost.complexity++;
3577
3578   cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3579   return cost;
3580
3581 fallback:
3582   {
3583     /* Just get the expression, expand it and measure the cost.  */
3584     tree comp = get_computation_at (data->current_loop, use, cand, at);
3585
3586     if (!comp)
3587       return infinite_cost;
3588
3589     if (address_p)
3590       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3591
3592     return new_cost (computation_cost (comp), 0);
3593   }
3594 }
3595
3596 /* Determines the cost of the computation by that USE is expressed
3597    from induction variable CAND.  If ADDRESS_P is true, we just need
3598    to create an address from it, otherwise we want to get it into
3599    register.  A set of invariants we depend on is stored in
3600    DEPENDS_ON.  */
3601
3602 static comp_cost
3603 get_computation_cost (struct ivopts_data *data,
3604                       struct iv_use *use, struct iv_cand *cand,
3605                       bool address_p, bitmap *depends_on)
3606 {
3607   return get_computation_cost_at (data,
3608                                   use, cand, address_p, depends_on, use->stmt);
3609 }
3610
3611 /* Determines cost of basing replacement of USE on CAND in a generic
3612    expression.  */
3613
3614 static bool
3615 determine_use_iv_cost_generic (struct ivopts_data *data,
3616                                struct iv_use *use, struct iv_cand *cand)
3617 {
3618   bitmap depends_on;
3619   comp_cost cost;
3620
3621   /* The simple case first -- if we need to express value of the preserved
3622      original biv, the cost is 0.  This also prevents us from counting the
3623      cost of increment twice -- once at this use and once in the cost of
3624      the candidate.  */
3625   if (cand->pos == IP_ORIGINAL
3626       && cand->incremented_at == use->stmt)
3627     {
3628       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3629       return true;
3630     }
3631
3632   cost = get_computation_cost (data, use, cand, false, &depends_on);
3633   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3634
3635   return !infinite_cost_p (cost);
3636 }
3637
3638 /* Determines cost of basing replacement of USE on CAND in an address.  */
3639
3640 static bool
3641 determine_use_iv_cost_address (struct ivopts_data *data,
3642                                struct iv_use *use, struct iv_cand *cand)
3643 {
3644   bitmap depends_on;
3645   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3646
3647   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3648
3649   return !infinite_cost_p (cost);
3650 }
3651
3652 /* Computes value of candidate CAND at position AT in iteration NITER, and
3653    stores it to VAL.  */
3654
3655 static void
3656 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3657                aff_tree *val)
3658 {
3659   aff_tree step, delta, nit;
3660   struct iv *iv = cand->iv;
3661   tree type = TREE_TYPE (iv->base);
3662
3663   tree_to_aff_combination (iv->step, type, &step);
3664   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3665   aff_combination_convert (&nit, type);
3666   aff_combination_mult (&nit, &step, &delta);
3667   if (stmt_after_increment (loop, cand, at))
3668     aff_combination_add (&delta, &step);
3669
3670   tree_to_aff_combination (iv->base, type, val);
3671   aff_combination_add (val, &delta);
3672 }
3673
3674 /* Returns period of induction variable iv.  */
3675
3676 static tree
3677 iv_period (struct iv *iv)
3678 {
3679   tree step = iv->step, period, type;
3680   tree pow2div;
3681
3682   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3683
3684   /* Period of the iv is gcd (step, type range).  Since type range is power
3685      of two, it suffices to determine the maximum power of two that divides
3686      step.  */
3687   pow2div = num_ending_zeros (step);
3688   type = unsigned_type_for (TREE_TYPE (step));
3689
3690   period = build_low_bits_mask (type,
3691                                 (TYPE_PRECISION (type)
3692                                  - tree_low_cst (pow2div, 1)));
3693
3694   return period;
3695 }
3696
3697 /* Returns the comparison operator used when eliminating the iv USE.  */
3698
3699 static enum tree_code
3700 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3701 {
3702   struct loop *loop = data->current_loop;
3703   basic_block ex_bb;
3704   edge exit;
3705
3706   ex_bb = bb_for_stmt (use->stmt);
3707   exit = EDGE_SUCC (ex_bb, 0);
3708   if (flow_bb_inside_loop_p (loop, exit->dest))
3709     exit = EDGE_SUCC (ex_bb, 1);
3710
3711   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3712 }
3713
3714 /* Check whether it is possible to express the condition in USE by comparison
3715    of candidate CAND.  If so, store the value compared with to BOUND.  */
3716
3717 static bool
3718 may_eliminate_iv (struct ivopts_data *data,
3719                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3720 {
3721   basic_block ex_bb;
3722   edge exit;
3723   tree nit, period;
3724   struct loop *loop = data->current_loop;
3725   aff_tree bnd;
3726   double_int period_value, max_niter;
3727
3728   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3729     return false;
3730
3731   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3732      for other conditions inside loop body.  */
3733   ex_bb = bb_for_stmt (use->stmt);
3734   if (use->stmt != last_stmt (ex_bb)
3735       || TREE_CODE (use->stmt) != COND_EXPR)
3736     return false;
3737   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3738     return false;
3739
3740   exit = EDGE_SUCC (ex_bb, 0);
3741   if (flow_bb_inside_loop_p (loop, exit->dest))
3742     exit = EDGE_SUCC (ex_bb, 1);
3743   if (flow_bb_inside_loop_p (loop, exit->dest))
3744     return false;
3745
3746   nit = niter_for_exit (data, exit);
3747   if (!nit)
3748     return false;
3749
3750   /* Determine whether we may use the variable to test whether niter iterations
3751      elapsed.  This is the case iff the period of the induction variable is
3752      greater than the number of iterations.  */
3753   period = iv_period (cand->iv);
3754   if (!period)
3755     return false;
3756
3757   /* Compare the period with the estimate on the number of iterations of the
3758      loop.  */
3759   if (!estimated_loop_iterations (loop, true, &max_niter))
3760     return false;
3761   period_value = tree_to_double_int (period);
3762   if (double_int_ucmp (period_value, max_niter) <= 0)
3763     return false;
3764
3765   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3766   *bound = aff_combination_to_tree (&bnd);
3767   return true;
3768 }
3769
3770 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3771
3772 static bool
3773 determine_use_iv_cost_condition (struct ivopts_data *data,
3774                                  struct iv_use *use, struct iv_cand *cand)
3775 {
3776   tree bound = NULL_TREE;
3777   struct iv *cmp_iv;
3778   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3779   comp_cost elim_cost, express_cost, cost;
3780   bool ok;
3781
3782   /* Only consider real candidates.  */
3783   if (!cand->iv)
3784     {
3785       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3786       return false;
3787     }
3788
3789   /* Try iv elimination.  */
3790   if (may_eliminate_iv (data, use, cand, &bound))
3791     {
3792       elim_cost = force_var_cost (data, bound, &depends_on_elim);
3793       /* The bound is a loop invariant, so it will be only computed
3794          once.  */
3795       elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3796     }
3797   else
3798     elim_cost = infinite_cost;
3799
3800   /* Try expressing the original giv.  If it is compared with an invariant,
3801      note that we cannot get rid of it.  */
3802   ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3803   gcc_assert (ok);
3804
3805   express_cost = get_computation_cost (data, use, cand, false,
3806                                        &depends_on_express);
3807   fd_ivopts_data = data;
3808   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3809
3810   /* Choose the better approach.  */
3811   if (compare_costs (elim_cost, express_cost) < 0)
3812     {
3813       cost = elim_cost;
3814       depends_on = depends_on_elim;
3815       depends_on_elim = NULL;
3816     }
3817   else
3818     {
3819       cost = express_cost;
3820       depends_on = depends_on_express;
3821       depends_on_express = NULL;
3822       bound = NULL_TREE;
3823     }
3824
3825   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3826
3827   if (depends_on_elim)
3828     BITMAP_FREE (depends_on_elim);
3829   if (depends_on_express)
3830     BITMAP_FREE (depends_on_express);
3831
3832   return !infinite_cost_p (cost);
3833 }
3834
3835 /* Determines cost of basing replacement of USE on CAND.  Returns false
3836    if USE cannot be based on CAND.  */
3837
3838 static bool
3839 determine_use_iv_cost (struct ivopts_data *data,
3840                        struct iv_use *use, struct iv_cand *cand)
3841 {
3842   switch (use->type)
3843     {
3844     case USE_NONLINEAR_EXPR:
3845       return determine_use_iv_cost_generic (data, use, cand);
3846
3847     case USE_ADDRESS:
3848       return determine_use_iv_cost_address (data, use, cand);
3849
3850     case USE_COMPARE:
3851       return determine_use_iv_cost_condition (data, use, cand);
3852
3853     default:
3854       gcc_unreachable ();
3855     }
3856 }
3857
3858 /* Determines costs of basing the use of the iv on an iv candidate.  */
3859
3860 static void
3861 determine_use_iv_costs (struct ivopts_data *data)
3862 {
3863   unsigned i, j;
3864   struct iv_use *use;
3865   struct iv_cand *cand;
3866   bitmap to_clear = BITMAP_ALLOC (NULL);
3867
3868   alloc_use_cost_map (data);
3869
3870   for (i = 0; i < n_iv_uses (data); i++)
3871     {
3872       use = iv_use (data, i);
3873
3874       if (data->consider_all_candidates)
3875         {
3876           for (j = 0; j < n_iv_cands (data); j++)
3877             {
3878               cand = iv_cand (data, j);
3879               determine_use_iv_cost (data, use, cand);
3880             }
3881         }
3882       else
3883         {
3884           bitmap_iterator bi;
3885
3886           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3887             {
3888               cand = iv_cand (data, j);
3889               if (!determine_use_iv_cost (data, use, cand))
3890                 bitmap_set_bit (to_clear, j);
3891             }
3892
3893           /* Remove the candidates for that the cost is infinite from
3894              the list of related candidates.  */
3895           bitmap_and_compl_into (use->related_cands, to_clear);
3896           bitmap_clear (to_clear);
3897         }
3898     }
3899
3900   BITMAP_FREE (to_clear);
3901
3902   if (dump_file && (dump_flags & TDF_DETAILS))
3903     {
3904       fprintf (dump_file, "Use-candidate costs:\n");
3905
3906       for (i = 0; i < n_iv_uses (data); i++)
3907         {
3908           use = iv_use (data, i);
3909
3910           fprintf (dump_file, "Use %d:\n", i);
3911           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
3912           for (j = 0; j < use->n_map_members; j++)
3913             {
3914               if (!use->cost_map[j].cand
3915                   || infinite_cost_p (use->cost_map[j].cost))
3916                 continue;
3917
3918               fprintf (dump_file, "  %d\t%d\t%d\t",
3919                        use->cost_map[j].cand->id,
3920                        use->cost_map[j].cost.cost,
3921                        use->cost_map[j].cost.complexity);
3922               if (use->cost_map[j].depends_on)
3923                 bitmap_print (dump_file,
3924                               use->cost_map[j].depends_on, "","");
3925               fprintf (dump_file, "\n");
3926             }
3927
3928           fprintf (dump_file, "\n");
3929         }
3930       fprintf (dump_file, "\n");
3931     }
3932 }
3933
3934 /* Determines cost of the candidate CAND.  */
3935
3936 static void
3937 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3938 {
3939   comp_cost cost_base;
3940   unsigned cost, cost_step;
3941   tree base;
3942
3943   if (!cand->iv)
3944     {
3945       cand->cost = 0;
3946       return;
3947     }
3948
3949   /* There are two costs associated with the candidate -- its increment
3950      and its initialization.  The second is almost negligible for any loop
3951      that rolls enough, so we take it just very little into account.  */
3952
3953   base = cand->iv->base;
3954   cost_base = force_var_cost (data, base, NULL);
3955   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3956
3957   cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
3958
3959   /* Prefer the original ivs unless we may gain something by replacing it.
3960      The reason is to makee debugging simpler; so this is not relevant for
3961      artificial ivs created by other optimization passes.  */
3962   if (cand->pos != IP_ORIGINAL
3963       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3964     cost++;
3965
3966   /* Prefer not to insert statements into latch unless there are some
3967      already (so that we do not create unnecessary jumps).  */
3968   if (cand->pos == IP_END
3969       && empty_block_p (ip_end_pos (data->current_loop)))
3970     cost++;
3971
3972   cand->cost = cost;
3973 }
3974
3975 /* Determines costs of computation of the candidates.  */
3976
3977 static void
3978 determine_iv_costs (struct ivopts_data *data)
3979 {
3980   unsigned i;
3981
3982   if (dump_file && (dump_flags & TDF_DETAILS))
3983     {
3984       fprintf (dump_file, "Candidate costs:\n");
3985       fprintf (dump_file, "  cand\tcost\n");
3986     }
3987
3988   for (i = 0; i < n_iv_cands (data); i++)
3989     {
3990       struct iv_cand *cand = iv_cand (data, i);
3991
3992       determine_iv_cost (data, cand);
3993
3994       if (dump_file && (dump_flags & TDF_DETAILS))
3995         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3996     }
3997
3998   if (dump_file && (dump_flags & TDF_DETAILS))
3999     fprintf (dump_file, "\n");
4000 }
4001
4002 /* Calculates cost for having SIZE induction variables.  */
4003
4004 static unsigned
4005 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4006 {
4007   /* We add size to the cost, so that we prefer eliminating ivs
4008      if possible.  */
4009   return size + estimate_reg_pressure_cost (size, data->regs_used);
4010 }
4011
4012 /* For each size of the induction variable set determine the penalty.  */
4013
4014 static void
4015 determine_set_costs (struct ivopts_data *data)
4016 {
4017   unsigned j, n;
4018   tree phi, op;
4019   struct loop *loop = data->current_loop;
4020   bitmap_iterator bi;
4021
4022   /* We use the following model (definitely improvable, especially the
4023      cost function -- TODO):
4024
4025      We estimate the number of registers available (using MD data), name it A.
4026
4027      We estimate the number of registers used by the loop, name it U.  This
4028      number is obtained as the number of loop phi nodes (not counting virtual
4029      registers and bivs) + the number of variables from outside of the loop.
4030
4031      We set a reserve R (free regs that are used for temporary computations,
4032      etc.).  For now the reserve is a constant 3.
4033
4034      Let I be the number of induction variables.
4035
4036      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4037         make a lot of ivs without a reason).
4038      -- if A - R < U + I <= A, the cost is I * PRES_COST
4039      -- if U + I > A, the cost is I * PRES_COST and
4040         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4041
4042   if (dump_file && (dump_flags & TDF_DETAILS))
4043     {
4044       fprintf (dump_file, "Global costs:\n");
4045       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4046       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost);
4047       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4048     }
4049
4050   n = 0;
4051   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4052     {
4053       op = PHI_RESULT (phi);
4054
4055       if (!is_gimple_reg (op))
4056         continue;
4057
4058       if (get_iv (data, op))
4059         continue;
4060
4061       n++;
4062     }
4063
4064   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4065     {
4066       struct version_info *info = ver_info (data, j);
4067
4068       if (info->inv_id && info->has_nonlin_use)
4069         n++;
4070     }
4071
4072   data->regs_used = n;
4073   if (dump_file && (dump_flags & TDF_DETAILS))
4074     fprintf (dump_file, "  regs_used %d\n", n);
4075
4076   if (dump_file && (dump_flags & TDF_DETAILS))
4077     {
4078       fprintf (dump_file, "  cost for size:\n");
4079       fprintf (dump_file, "  ivs\tcost\n");
4080       for (j = 0; j <= 2 * target_avail_regs; j++)
4081         fprintf (dump_file, "  %d\t%d\n", j,
4082                  ivopts_global_cost_for_size (data, j));
4083       fprintf (dump_file, "\n");
4084     }
4085 }
4086
4087 /* Returns true if A is a cheaper cost pair than B.  */
4088
4089 static bool
4090 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4091 {
4092   int cmp;
4093
4094   if (!a)
4095     return false;
4096
4097   if (!b)
4098     return true;
4099
4100   cmp = compare_costs (a->cost, b->cost);
4101   if (cmp < 0)
4102     return true;
4103
4104   if (cmp > 0)
4105     return false;
4106
4107   /* In case the costs are the same, prefer the cheaper candidate.  */
4108   if (a->cand->cost < b->cand->cost)
4109     return true;
4110
4111   return false;
4112 }
4113
4114 /* Computes the cost field of IVS structure.  */
4115
4116 static void
4117 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4118 {
4119   comp_cost cost = ivs->cand_use_cost;
4120   cost.cost += ivs->cand_cost;
4121   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4122
4123   ivs->cost = cost;
4124 }
4125
4126 /* Remove invariants in set INVS to set IVS.  */
4127
4128 static void
4129 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4130 {
4131   bitmap_iterator bi;
4132   unsigned iid;
4133
4134   if (!invs)
4135     return;
4136
4137   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4138     {
4139       ivs->n_invariant_uses[iid]--;
4140       if (ivs->n_invariant_uses[iid] == 0)
4141         ivs->n_regs--;
4142     }
4143 }
4144
4145 /* Set USE not to be expressed by any candidate in IVS.  */
4146
4147 static void
4148 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4149                  struct iv_use *use)
4150 {
4151   unsigned uid = use->id, cid;
4152   struct cost_pair *cp;
4153
4154   cp = ivs->cand_for_use[uid];
4155   if (!cp)
4156     return;
4157   cid = cp->cand->id;
4158
4159   ivs->bad_uses++;
4160   ivs->cand_for_use[uid] = NULL;
4161   ivs->n_cand_uses[cid]--;
4162
4163   if (ivs->n_cand_uses[cid] == 0)
4164     {
4165       bitmap_clear_bit (ivs->cands, cid);
4166       /* Do not count the pseudocandidates.  */
4167       if (cp->cand->iv)
4168         ivs->n_regs--;
4169       ivs->n_cands--;
4170       ivs->cand_cost -= cp->cand->cost;
4171
4172       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4173     }
4174
4175   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4176
4177   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4178   iv_ca_recount_cost (data, ivs);
4179 }
4180
4181 /* Add invariants in set INVS to set IVS.  */
4182
4183 static void
4184 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4185 {
4186   bitmap_iterator bi;
4187   unsigned iid;
4188
4189   if (!invs)
4190     return;
4191
4192   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4193     {
4194       ivs->n_invariant_uses[iid]++;
4195       if (ivs->n_invariant_uses[iid] == 1)
4196         ivs->n_regs++;
4197     }
4198 }
4199
4200 /* Set cost pair for USE in set IVS to CP.  */
4201
4202 static void
4203 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4204               struct iv_use *use, struct cost_pair *cp)
4205 {
4206   unsigned uid = use->id, cid;
4207
4208   if (ivs->cand_for_use[uid] == cp)
4209     return;
4210
4211   if (ivs->cand_for_use[uid])
4212     iv_ca_set_no_cp (data, ivs, use);
4213
4214   if (cp)
4215     {
4216       cid = cp->cand->id;
4217
4218       ivs->bad_uses--;
4219       ivs->cand_for_use[uid] = cp;
4220       ivs->n_cand_uses[cid]++;
4221       if (ivs->n_cand_uses[cid] == 1)
4222         {
4223           bitmap_set_bit (ivs->cands, cid);
4224           /* Do not count the pseudocandidates.  */
4225           if (cp->cand->iv)
4226             ivs->n_regs++;
4227           ivs->n_cands++;
4228           ivs->cand_cost += cp->cand->cost;
4229
4230           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4231         }
4232
4233       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4234       iv_ca_set_add_invariants (ivs, cp->depends_on);
4235       iv_ca_recount_cost (data, ivs);
4236     }
4237 }
4238
4239 /* Extend set IVS by expressing USE by some of the candidates in it
4240    if possible.  */
4241
4242 static void
4243 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4244                struct iv_use *use)
4245 {
4246   struct cost_pair *best_cp = NULL, *cp;
4247   bitmap_iterator bi;
4248   unsigned i;
4249
4250   gcc_assert (ivs->upto >= use->id);
4251
4252   if (ivs->upto == use->id)
4253     {
4254       ivs->upto++;
4255       ivs->bad_uses++;
4256     }
4257
4258   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4259     {
4260       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4261
4262       if (cheaper_cost_pair (cp, best_cp))
4263         best_cp = cp;
4264     }
4265
4266   iv_ca_set_cp (data, ivs, use, best_cp);
4267 }
4268
4269 /* Get cost for assignment IVS.  */
4270
4271 static comp_cost
4272 iv_ca_cost (struct iv_ca *ivs)
4273 {
4274   return (ivs->bad_uses ? infinite_cost : ivs->cost);
4275 }
4276
4277 /* Returns true if all dependences of CP are among invariants in IVS.  */
4278
4279 static bool
4280 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4281 {
4282   unsigned i;
4283   bitmap_iterator bi;
4284
4285   if (!cp->depends_on)
4286     return true;
4287
4288   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4289     {
4290       if (ivs->n_invariant_uses[i] == 0)
4291         return false;
4292     }
4293
4294   return true;
4295 }
4296
4297 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4298    it before NEXT_CHANGE.  */
4299
4300 static struct iv_ca_delta *
4301 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4302                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4303 {
4304   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4305
4306   change->use = use;
4307   change->old_cp = old_cp;
4308   change->new_cp = new_cp;
4309   change->next_change = next_change;
4310
4311   return change;
4312 }
4313
4314 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4315    are rewritten.  */
4316
4317 static struct iv_ca_delta *
4318 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4319 {
4320   struct iv_ca_delta *last;
4321
4322   if (!l2)
4323     return l1;
4324
4325   if (!l1)
4326     return l2;
4327
4328   for (last = l1; last->next_change; last = last->next_change)
4329     continue;
4330   last->next_change = l2;
4331
4332   return l1;
4333 }
4334
4335 /* Returns candidate by that USE is expressed in IVS.  */
4336
4337 static struct cost_pair *
4338 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4339 {
4340   return ivs->cand_for_use[use->id];
4341 }
4342
4343 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4344
4345 static struct iv_ca_delta *
4346 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4347 {
4348   struct iv_ca_delta *act, *next, *prev = NULL;
4349   struct cost_pair *tmp;
4350
4351   for (act = delta; act; act = next)
4352     {
4353       next = act->next_change;
4354       act->next_change = prev;
4355       prev = act;
4356
4357       tmp = act->old_cp;
4358       act->old_cp = act->new_cp;
4359       act->new_cp = tmp;
4360     }
4361
4362   return prev;
4363 }
4364
4365 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4366    reverted instead.  */
4367
4368 static void
4369 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4370                     struct iv_ca_delta *delta, bool forward)
4371 {
4372   struct cost_pair *from, *to;
4373   struct iv_ca_delta *act;
4374
4375   if (!forward)
4376     delta = iv_ca_delta_reverse (delta);
4377
4378   for (act = delta; act; act = act->next_change)
4379     {
4380       from = act->old_cp;
4381       to = act->new_cp;
4382       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4383       iv_ca_set_cp (data, ivs, act->use, to);
4384     }
4385
4386   if (!forward)
4387     iv_ca_delta_reverse (delta);
4388 }
4389
4390 /* Returns true if CAND is used in IVS.  */
4391
4392 static bool
4393 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4394 {
4395   return ivs->n_cand_uses[cand->id] > 0;
4396 }
4397
4398 /* Returns number of induction variable candidates in the set IVS.  */
4399
4400 static unsigned
4401 iv_ca_n_cands (struct iv_ca *ivs)
4402 {
4403   return ivs->n_cands;
4404 }
4405
4406 /* Free the list of changes DELTA.  */
4407
4408 static void
4409 iv_ca_delta_free (struct iv_ca_delta **delta)
4410 {
4411   struct iv_ca_delta *act, *next;
4412
4413   for (act = *delta; act; act = next)
4414     {
4415       next = act->next_change;
4416       free (act);
4417     }
4418
4419   *delta = NULL;
4420 }
4421
4422 /* Allocates new iv candidates assignment.  */
4423
4424 static struct iv_ca *
4425 iv_ca_new (struct ivopts_data *data)
4426 {
4427   struct iv_ca *nw = XNEW (struct iv_ca);
4428
4429   nw->upto = 0;
4430   nw->bad_uses = 0;
4431   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4432   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4433   nw->cands = BITMAP_ALLOC (NULL);
4434   nw->n_cands = 0;
4435   nw->n_regs = 0;
4436   nw->cand_use_cost = zero_cost;
4437   nw->cand_cost = 0;
4438   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4439   nw->cost = zero_cost;
4440
4441   return nw;
4442 }
4443
4444 /* Free memory occupied by the set IVS.  */
4445
4446 static void
4447 iv_ca_free (struct iv_ca **ivs)
4448 {
4449   free ((*ivs)->cand_for_use);
4450   free ((*ivs)->n_cand_uses);
4451   BITMAP_FREE ((*ivs)->cands);
4452   free ((*ivs)->n_invariant_uses);
4453   free (*ivs);
4454   *ivs = NULL;
4455 }
4456
4457 /* Dumps IVS to FILE.  */
4458
4459 static void
4460 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4461 {
4462   const char *pref = "  invariants ";
4463   unsigned i;
4464   comp_cost cost = iv_ca_cost (ivs);
4465
4466   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4467   bitmap_print (file, ivs->cands, "  candidates ","\n");
4468
4469   for (i = 1; i <= data->max_inv_id; i++)
4470     if (ivs->n_invariant_uses[i])
4471       {
4472         fprintf (file, "%s%d", pref, i);
4473         pref = ", ";
4474       }
4475   fprintf (file, "\n");
4476 }
4477
4478 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4479    new set, and store differences in DELTA.  Number of induction variables
4480    in the new set is stored to N_IVS.  */
4481
4482 static comp_cost
4483 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4484               struct iv_cand *cand, struct iv_ca_delta **delta,
4485               unsigned *n_ivs)
4486 {
4487   unsigned i;
4488   comp_cost cost;
4489   struct iv_use *use;
4490   struct cost_pair *old_cp, *new_cp;
4491
4492   *delta = NULL;
4493   for (i = 0; i < ivs->upto; i++)
4494     {
4495       use = iv_use (data, i);
4496       old_cp = iv_ca_cand_for_use (ivs, use);
4497
4498       if (old_cp
4499           && old_cp->cand == cand)
4500         continue;
4501
4502       new_cp = get_use_iv_cost (data, use, cand);
4503       if (!new_cp)
4504         continue;
4505
4506       if (!iv_ca_has_deps (ivs, new_cp))
4507         continue;
4508
4509       if (!cheaper_cost_pair (new_cp, old_cp))
4510         continue;
4511
4512       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4513     }
4514
4515   iv_ca_delta_commit (data, ivs, *delta, true);
4516   cost = iv_ca_cost (ivs);
4517   if (n_ivs)
4518     *n_ivs = iv_ca_n_cands (ivs);
4519   iv_ca_delta_commit (data, ivs, *delta, false);
4520
4521   return cost;
4522 }
4523
4524 /* Try narrowing set IVS by removing CAND.  Return the cost of
4525    the new set and store the differences in DELTA.  */
4526
4527 static comp_cost
4528 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4529               struct iv_cand *cand, struct iv_ca_delta **delta)
4530 {
4531   unsigned i, ci;
4532   struct iv_use *use;
4533   struct cost_pair *old_cp, *new_cp, *cp;
4534   bitmap_iterator bi;
4535   struct iv_cand *cnd;
4536   comp_cost cost;
4537
4538   *delta = NULL;
4539   for (i = 0; i < n_iv_uses (data); i++)
4540     {
4541       use = iv_use (data, i);
4542
4543       old_cp = iv_ca_cand_for_use (ivs, use);
4544       if (old_cp->cand != cand)
4545         continue;
4546
4547       new_cp = NULL;
4548
4549       if (data->consider_all_candidates)
4550         {
4551           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4552             {
4553               if (ci == cand->id)
4554                 continue;
4555
4556               cnd = iv_cand (data, ci);
4557
4558               cp = get_use_iv_cost (data, use, cnd);
4559               if (!cp)
4560                 continue;
4561               if (!iv_ca_has_deps (ivs, cp))
4562                 continue;
4563
4564               if (!cheaper_cost_pair (cp, new_cp))
4565                 continue;
4566
4567               new_cp = cp;
4568             }
4569         }
4570       else
4571         {
4572           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4573             {
4574               if (ci == cand->id)
4575                 continue;
4576
4577               cnd = iv_cand (data, ci);
4578
4579               cp = get_use_iv_cost (data, use, cnd);
4580               if (!cp)
4581                 continue;
4582               if (!iv_ca_has_deps (ivs, cp))
4583                 continue;
4584
4585               if (!cheaper_cost_pair (cp, new_cp))
4586                 continue;
4587
4588               new_cp = cp;
4589             }
4590         }
4591
4592       if (!new_cp)
4593         {
4594           iv_ca_delta_free (delta);
4595           return infinite_cost;
4596         }
4597
4598       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4599     }
4600
4601   iv_ca_delta_commit (data, ivs, *delta, true);
4602   cost = iv_ca_cost (ivs);
4603   iv_ca_delta_commit (data, ivs, *delta, false);
4604
4605   return cost;
4606 }
4607
4608 /* Try optimizing the set of candidates IVS by removing candidates different
4609    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4610    differences in DELTA.  */
4611
4612 static comp_cost
4613 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4614              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4615 {
4616   bitmap_iterator bi;
4617   struct iv_ca_delta *act_delta, *best_delta;
4618   unsigned i;
4619   comp_cost best_cost, acost;
4620   struct iv_cand *cand;
4621
4622   best_delta = NULL;
4623   best_cost = iv_ca_cost (ivs);
4624
4625   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4626     {
4627       cand = iv_cand (data, i);
4628
4629       if (cand == except_cand)
4630         continue;
4631
4632       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4633
4634       if (compare_costs (acost, best_cost) < 0)
4635         {
4636           best_cost = acost;
4637           iv_ca_delta_free (&best_delta);
4638           best_delta = act_delta;
4639         }
4640       else
4641         iv_ca_delta_free (&act_delta);
4642     }
4643
4644   if (!best_delta)
4645     {
4646       *delta = NULL;
4647       return best_cost;
4648     }
4649
4650   /* Recurse to possibly remove other unnecessary ivs.  */
4651   iv_ca_delta_commit (data, ivs, best_delta, true);
4652   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4653   iv_ca_delta_commit (data, ivs, best_delta, false);
4654   *delta = iv_ca_delta_join (best_delta, *delta);
4655   return best_cost;
4656 }
4657
4658 /* Tries to extend the sets IVS in the best possible way in order
4659    to express the USE.  */
4660
4661 static bool
4662 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4663                   struct iv_use *use)
4664 {
4665   comp_cost best_cost, act_cost;
4666   unsigned i;
4667   bitmap_iterator bi;
4668   struct iv_cand *cand;
4669   struct iv_ca_delta *best_delta = NULL, *act_delta;
4670   struct cost_pair *cp;
4671
4672   iv_ca_add_use (data, ivs, use);
4673   best_cost = iv_ca_cost (ivs);
4674
4675   cp = iv_ca_cand_for_use (ivs, use);
4676   if (cp)
4677     {
4678       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4679       iv_ca_set_no_cp (data, ivs, use);
4680     }
4681
4682   /* First try important candidates not based on any memory object.  Only if
4683      this fails, try the specific ones.  Rationale -- in loops with many
4684      variables the best choice often is to use just one generic biv.  If we
4685      added here many ivs specific to the uses, the optimization algorithm later
4686      would be likely to get stuck in a local minimum, thus causing us to create
4687      too many ivs.  The approach from few ivs to more seems more likely to be
4688      successful -- starting from few ivs, replacing an expensive use by a
4689      specific iv should always be a win.  */
4690   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4691     {
4692       cand = iv_cand (data, i);
4693
4694       if (cand->iv->base_object != NULL_TREE)
4695         continue;
4696
4697       if (iv_ca_cand_used_p (ivs, cand))
4698         continue;
4699
4700       cp = get_use_iv_cost (data, use, cand);
4701       if (!cp)
4702         continue;
4703
4704       iv_ca_set_cp (data, ivs, use, cp);
4705       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4706       iv_ca_set_no_cp (data, ivs, use);
4707       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4708
4709       if (compare_costs (act_cost, best_cost) < 0)
4710         {
4711           best_cost = act_cost;
4712
4713           iv_ca_delta_free (&best_delta);
4714           best_delta = act_delta;
4715         }
4716       else
4717         iv_ca_delta_free (&act_delta);
4718     }
4719
4720   if (infinite_cost_p (best_cost))
4721     {
4722       for (i = 0; i < use->n_map_members; i++)
4723         {
4724           cp = use->cost_map + i;
4725           cand = cp->cand;
4726           if (!cand)
4727             continue;
4728
4729           /* Already tried this.  */
4730           if (cand->important && cand->iv->base_object == NULL_TREE)
4731             continue;
4732
4733           if (iv_ca_cand_used_p (ivs, cand))
4734             continue;
4735
4736           act_delta = NULL;
4737           iv_ca_set_cp (data, ivs, use, cp);
4738           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4739           iv_ca_set_no_cp (data, ivs, use);
4740           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4741                                        cp, act_delta);
4742
4743           if (compare_costs (act_cost, best_cost) < 0)
4744             {
4745               best_cost = act_cost;
4746
4747               if (best_delta)
4748                 iv_ca_delta_free (&best_delta);
4749               best_delta = act_delta;
4750             }
4751           else
4752             iv_ca_delta_free (&act_delta);
4753         }
4754     }
4755
4756   iv_ca_delta_commit (data, ivs, best_delta, true);
4757   iv_ca_delta_free (&best_delta);
4758
4759   return !infinite_cost_p (best_cost);
4760 }
4761
4762 /* Finds an initial assignment of candidates to uses.  */
4763
4764 static struct iv_ca *
4765 get_initial_solution (struct ivopts_data *data)
4766 {
4767   struct iv_ca *ivs = iv_ca_new (data);
4768   unsigned i;
4769
4770   for (i = 0; i < n_iv_uses (data); i++)
4771     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4772       {
4773         iv_ca_free (&ivs);
4774         return NULL;
4775       }
4776
4777   return ivs;
4778 }
4779
4780 /* Tries to improve set of induction variables IVS.  */
4781
4782 static bool
4783 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4784 {
4785   unsigned i, n_ivs;
4786   comp_cost acost, best_cost = iv_ca_cost (ivs);
4787   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4788   struct iv_cand *cand;
4789
4790   /* Try extending the set of induction variables by one.  */
4791   for (i = 0; i < n_iv_cands (data); i++)
4792     {
4793       cand = iv_cand (data, i);
4794
4795       if (iv_ca_cand_used_p (ivs, cand))
4796         continue;
4797
4798       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4799       if (!act_delta)
4800         continue;
4801
4802       /* If we successfully added the candidate and the set is small enough,
4803          try optimizing it by removing other candidates.  */
4804       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4805         {
4806           iv_ca_delta_commit (data, ivs, act_delta, true);
4807           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4808           iv_ca_delta_commit (data, ivs, act_delta, false);
4809           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4810         }
4811
4812       if (compare_costs (acost, best_cost) < 0)
4813         {
4814           best_cost = acost;
4815           iv_ca_delta_free (&best_delta);
4816           best_delta = act_delta;
4817         }
4818       else
4819         iv_ca_delta_free (&act_delta);
4820     }
4821
4822   if (!best_delta)
4823     {
4824       /* Try removing the candidates from the set instead.  */
4825       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4826
4827       /* Nothing more we can do.  */
4828       if (!best_delta)
4829         return false;
4830     }
4831
4832   iv_ca_delta_commit (data, ivs, best_delta, true);
4833   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4834   iv_ca_delta_free (&best_delta);
4835   return true;
4836 }
4837
4838 /* Attempts to find the optimal set of induction variables.  We do simple
4839    greedy heuristic -- we try to replace at most one candidate in the selected
4840    solution and remove the unused ivs while this improves the cost.  */
4841
4842 static struct iv_ca *
4843 find_optimal_iv_set (struct ivopts_data *data)
4844 {
4845   unsigned i;
4846   struct iv_ca *set;
4847   struct iv_use *use;
4848
4849   /* Get the initial solution.  */
4850   set = get_initial_solution (data);
4851   if (!set)
4852     {
4853       if (dump_file && (dump_flags & TDF_DETAILS))
4854         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4855       return NULL;
4856     }
4857
4858   if (dump_file && (dump_flags & TDF_DETAILS))
4859     {
4860       fprintf (dump_file, "Initial set of candidates:\n");
4861       iv_ca_dump (data, dump_file, set);
4862     }
4863
4864   while (try_improve_iv_set (data, set))
4865     {
4866       if (dump_file && (dump_flags & TDF_DETAILS))
4867         {
4868           fprintf (dump_file, "Improved to:\n");
4869           iv_ca_dump (data, dump_file, set);
4870         }
4871     }
4872
4873   if (dump_file && (dump_flags & TDF_DETAILS))
4874     {
4875       comp_cost cost = iv_ca_cost (set);
4876       fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4877     }
4878
4879   for (i = 0; i < n_iv_uses (data); i++)
4880     {
4881       use = iv_use (data, i);
4882       use->selected = iv_ca_cand_for_use (set, use)->cand;
4883     }
4884
4885   return set;
4886 }
4887
4888 /* Creates a new induction variable corresponding to CAND.  */
4889
4890 static void
4891 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4892 {
4893   block_stmt_iterator incr_pos;
4894   tree base;
4895   bool after = false;
4896
4897   if (!cand->iv)
4898     return;
4899
4900   switch (cand->pos)
4901     {
4902     case IP_NORMAL:
4903       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4904       break;
4905
4906     case IP_END:
4907       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4908       after = true;
4909       break;
4910
4911     case IP_ORIGINAL:
4912       /* Mark that the iv is preserved.  */
4913       name_info (data, cand->var_before)->preserve_biv = true;
4914       name_info (data, cand->var_after)->preserve_biv = true;
4915
4916       /* Rewrite the increment so that it uses var_before directly.  */
4917       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4918
4919       return;
4920     }
4921
4922   gimple_add_tmp_var (cand->var_before);
4923   add_referenced_var (cand->var_before);
4924
4925   base = unshare_expr (cand->iv->base);
4926
4927   create_iv (base, unshare_expr (cand->iv->step),
4928              cand->var_before, data->current_loop,
4929              &incr_pos, after, &cand->var_before, &cand->var_after);
4930 }
4931
4932 /* Creates new induction variables described in SET.  */
4933
4934 static void
4935 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4936 {
4937   unsigned i;
4938   struct iv_cand *cand;
4939   bitmap_iterator bi;
4940
4941   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4942     {
4943       cand = iv_cand (data, i);
4944       create_new_iv (data, cand);
4945     }
4946 }
4947
4948 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4949    is true, remove also the ssa name defined by the statement.  */
4950
4951 static void
4952 remove_statement (tree stmt, bool including_defined_name)
4953 {
4954   if (TREE_CODE (stmt) == PHI_NODE)
4955     {
4956       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4957     }
4958   else
4959     {
4960       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4961
4962       bsi_remove (&bsi, true);
4963       release_defs (stmt);
4964     }
4965 }
4966
4967 /* Rewrites USE (definition of iv used in a nonlinear expression)
4968    using candidate CAND.  */
4969
4970 static void
4971 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4972                             struct iv_use *use, struct iv_cand *cand)
4973 {
4974   tree comp;
4975   tree op, tgt, ass;
4976   block_stmt_iterator bsi;
4977
4978   /* An important special case -- if we are asked to express value of
4979      the original iv by itself, just exit; there is no need to
4980      introduce a new computation (that might also need casting the
4981      variable to unsigned and back).  */
4982   if (cand->pos == IP_ORIGINAL
4983       && cand->incremented_at == use->stmt)
4984     {
4985       tree step, ctype, utype;
4986       enum tree_code incr_code = PLUS_EXPR;
4987
4988       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4989       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4990
4991       step = cand->iv->step;
4992       ctype = TREE_TYPE (step);
4993       utype = TREE_TYPE (cand->var_after);
4994       if (TREE_CODE (step) == NEGATE_EXPR)
4995         {
4996           incr_code = MINUS_EXPR;
4997           step = TREE_OPERAND (step, 0);
4998         }
4999
5000       /* Check whether we may leave the computation unchanged.
5001          This is the case only if it does not rely on other
5002          computations in the loop -- otherwise, the computation
5003          we rely upon may be removed in remove_unused_ivs,
5004          thus leading to ICE.  */
5005       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
5006       if (TREE_CODE (op) == PLUS_EXPR
5007           || TREE_CODE (op) == MINUS_EXPR
5008           || TREE_CODE (op) == POINTER_PLUS_EXPR)
5009         {
5010           if (TREE_OPERAND (op, 0) == cand->var_before)
5011             op = TREE_OPERAND (op, 1);
5012           else if (TREE_CODE (op) != MINUS_EXPR
5013                    && TREE_OPERAND (op, 1) == cand->var_before)
5014             op = TREE_OPERAND (op, 0);
5015           else
5016             op = NULL_TREE;
5017         }
5018       else
5019         op = NULL_TREE;
5020
5021       if (op
5022           && (TREE_CODE (op) == INTEGER_CST
5023               || operand_equal_p (op, step, 0)))
5024         return;
5025
5026       /* Otherwise, add the necessary computations to express
5027          the iv.  */
5028       op = fold_convert (ctype, cand->var_before);
5029       comp = fold_convert (utype,
5030                            build2 (incr_code, ctype, op,
5031                                    unshare_expr (step)));
5032     }
5033   else
5034     {
5035       comp = get_computation (data->current_loop, use, cand);
5036       gcc_assert (comp != NULL_TREE);
5037     }
5038
5039   switch (TREE_CODE (use->stmt))
5040     {
5041     case PHI_NODE:
5042       tgt = PHI_RESULT (use->stmt);
5043
5044       /* If we should keep the biv, do not replace it.  */
5045       if (name_info (data, tgt)->preserve_biv)
5046         return;
5047
5048       bsi = bsi_after_labels (bb_for_stmt (use->stmt));
5049       break;
5050
5051     case GIMPLE_MODIFY_STMT:
5052       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
5053       bsi = bsi_for_stmt (use->stmt);
5054       break;
5055
5056     default:
5057       gcc_unreachable ();
5058     }
5059
5060   op = force_gimple_operand_bsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5061                                  true, BSI_SAME_STMT);
5062
5063   if (TREE_CODE (use->stmt) == PHI_NODE)
5064     {
5065       ass = build_gimple_modify_stmt (tgt, op);
5066       bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
5067       remove_statement (use->stmt, false);
5068       SSA_NAME_DEF_STMT (tgt) = ass;
5069     }
5070   else
5071     GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
5072 }
5073
5074 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5075    for_each_index.  */
5076
5077 static bool
5078 idx_remove_ssa_names (tree base, tree *idx,
5079                       void *data ATTRIBUTE_UNUSED)
5080 {
5081   tree *op;
5082
5083   if (TREE_CODE (*idx) == SSA_NAME)
5084     *idx = SSA_NAME_VAR (*idx);
5085
5086   if (TREE_CODE (base) == ARRAY_REF)
5087     {
5088       op = &TREE_OPERAND (base, 2);
5089       if (*op
5090           && TREE_CODE (*op) == SSA_NAME)
5091         *op = SSA_NAME_VAR (*op);
5092       op = &TREE_OPERAND (base, 3);
5093       if (*op
5094           && TREE_CODE (*op) == SSA_NAME)
5095         *op = SSA_NAME_VAR (*op);
5096     }
5097
5098   return true;
5099 }
5100
5101 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5102
5103 static tree
5104 unshare_and_remove_ssa_names (tree ref)
5105 {
5106   ref = unshare_expr (ref);
5107   for_each_index (&ref, idx_remove_ssa_names, NULL);
5108
5109   return ref;
5110 }
5111
5112 /* Extract the alias analysis info for the memory reference REF.  There are
5113    several ways how this information may be stored and what precisely is
5114    its semantics depending on the type of the reference, but there always is
5115    somewhere hidden one _DECL node that is used to determine the set of
5116    virtual operands for the reference.  The code below deciphers this jungle
5117    and extracts this single useful piece of information.  */
5118
5119 static tree
5120 get_ref_tag (tree ref, tree orig)
5121 {
5122   tree var = get_base_address (ref);
5123   tree aref = NULL_TREE, tag, sv;
5124   HOST_WIDE_INT offset, size, maxsize;
5125
5126   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5127     {
5128       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5129       if (ref)
5130         break;
5131     }
5132
5133   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5134     return aref;
5135
5136   if (!var)
5137     return NULL_TREE;
5138
5139   if (TREE_CODE (var) == INDIRECT_REF)
5140     {
5141       /* If the base is a dereference of a pointer, first check its name memory
5142          tag.  If it does not have one, use its symbol memory tag.  */
5143       var = TREE_OPERAND (var, 0);
5144       if (TREE_CODE (var) != SSA_NAME)
5145         return NULL_TREE;
5146
5147       if (SSA_NAME_PTR_INFO (var))
5148         {
5149           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5150           if (tag)
5151             return tag;
5152         }
5153
5154       var = SSA_NAME_VAR (var);
5155       tag = symbol_mem_tag (var);
5156       gcc_assert (tag != NULL_TREE);
5157       return tag;
5158     }
5159   else
5160     {
5161       if (!DECL_P (var))
5162         return NULL_TREE;
5163
5164       tag = symbol_mem_tag (var);
5165       if (tag)
5166         return tag;
5167
5168       return var;
5169     }
5170 }
5171
5172 /* Copies the reference information from OLD_REF to NEW_REF.  */
5173
5174 static void
5175 copy_ref_info (tree new_ref, tree old_ref)
5176 {
5177   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5178     copy_mem_ref_info (new_ref, old_ref);
5179   else
5180     {
5181       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5182       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5183     }
5184 }
5185
5186 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5187
5188 static void
5189 rewrite_use_address (struct ivopts_data *data,
5190                      struct iv_use *use, struct iv_cand *cand)
5191 {
5192   aff_tree aff;
5193   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5194   tree ref;
5195   bool ok;
5196
5197   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5198   gcc_assert (ok);
5199   unshare_aff_combination (&aff);
5200
5201   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5202   copy_ref_info (ref, *use->op_p);
5203   *use->op_p = ref;
5204 }
5205
5206 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5207    candidate CAND.  */
5208
5209 static void
5210 rewrite_use_compare (struct ivopts_data *data,
5211                      struct iv_use *use, struct iv_cand *cand)
5212 {
5213   tree comp, *var_p, op, bound;
5214   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5215   enum tree_code compare;
5216   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5217   bool ok;
5218
5219   bound = cp->value;
5220   if (bound)
5221     {
5222       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5223       tree var_type = TREE_TYPE (var);
5224
5225       compare = iv_elimination_compare (data, use);
5226       bound = unshare_expr (fold_convert (var_type, bound));
5227       op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE,
5228                                      true, BSI_SAME_STMT);
5229
5230       *use->op_p = build2 (compare, boolean_type_node, var, op);
5231       return;
5232     }
5233
5234   /* The induction variable elimination failed; just express the original
5235      giv.  */
5236   comp = get_computation (data->current_loop, use, cand);
5237   gcc_assert (comp != NULL_TREE);
5238
5239   ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5240   gcc_assert (ok);
5241
5242   *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5243                                      true, BSI_SAME_STMT);
5244 }
5245
5246 /* Rewrites USE using candidate CAND.  */
5247
5248 static void
5249 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5250 {
5251   push_stmt_changes (&use->stmt);
5252
5253   switch (use->type)
5254     {
5255       case USE_NONLINEAR_EXPR:
5256         rewrite_use_nonlinear_expr (data, use, cand);
5257         break;
5258
5259       case USE_ADDRESS:
5260         rewrite_use_address (data, use, cand);
5261         break;
5262
5263       case USE_COMPARE:
5264         rewrite_use_compare (data, use, cand);
5265         break;
5266
5267       default:
5268         gcc_unreachable ();
5269     }
5270
5271   pop_stmt_changes (&use->stmt);
5272 }
5273
5274 /* Rewrite the uses using the selected induction variables.  */
5275
5276 static void
5277 rewrite_uses (struct ivopts_data *data)
5278 {
5279   unsigned i;
5280   struct iv_cand *cand;
5281   struct iv_use *use;
5282
5283   for (i = 0; i < n_iv_uses (data); i++)
5284     {
5285       use = iv_use (data, i);
5286       cand = use->selected;
5287       gcc_assert (cand);
5288
5289       rewrite_use (data, use, cand);
5290     }
5291 }
5292
5293 /* Removes the ivs that are not used after rewriting.  */
5294
5295 static void
5296 remove_unused_ivs (struct ivopts_data *data)
5297 {
5298   unsigned j;
5299   bitmap_iterator bi;
5300
5301   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5302     {
5303       struct version_info *info;
5304
5305       info = ver_info (data, j);
5306       if (info->iv
5307           && !integer_zerop (info->iv->step)
5308           && !info->inv_id
5309           && !info->iv->have_use_for
5310           && !info->preserve_biv)
5311         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5312     }
5313 }
5314
5315 /* Frees data allocated by the optimization of a single loop.  */
5316
5317 static void
5318 free_loop_data (struct ivopts_data *data)
5319 {
5320   unsigned i, j;
5321   bitmap_iterator bi;
5322   tree obj;
5323
5324   if (data->niters)
5325     {
5326       pointer_map_destroy (data->niters);
5327       data->niters = NULL;
5328     }
5329
5330   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5331     {
5332       struct version_info *info;
5333
5334       info = ver_info (data, i);
5335       if (info->iv)
5336         free (info->iv);
5337       info->iv = NULL;
5338       info->has_nonlin_use = false;
5339       info->preserve_biv = false;
5340       info->inv_id = 0;
5341     }
5342   bitmap_clear (data->relevant);
5343   bitmap_clear (data->important_candidates);
5344
5345   for (i = 0; i < n_iv_uses (data); i++)
5346     {
5347       struct iv_use *use = iv_use (data, i);
5348
5349       free (use->iv);
5350       BITMAP_FREE (use->related_cands);
5351       for (j = 0; j < use->n_map_members; j++)
5352         if (use->cost_map[j].depends_on)
5353           BITMAP_FREE (use->cost_map[j].depends_on);
5354       free (use->cost_map);
5355       free (use);
5356     }
5357   VEC_truncate (iv_use_p, data->iv_uses, 0);
5358
5359   for (i = 0; i < n_iv_cands (data); i++)
5360     {
5361       struct iv_cand *cand = iv_cand (data, i);
5362
5363       if (cand->iv)
5364         free (cand->iv);
5365       if (cand->depends_on)
5366         BITMAP_FREE (cand->depends_on);
5367       free (cand);
5368     }
5369   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5370
5371   if (data->version_info_size < num_ssa_names)
5372     {
5373       data->version_info_size = 2 * num_ssa_names;
5374       free (data->version_info);
5375       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5376     }
5377
5378   data->max_inv_id = 0;
5379
5380   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5381     SET_DECL_RTL (obj, NULL_RTX);
5382
5383   VEC_truncate (tree, decl_rtl_to_reset, 0);
5384 }
5385
5386 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5387    loop tree.  */
5388
5389 static void
5390 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5391 {
5392   free_loop_data (data);
5393   free (data->version_info);
5394   BITMAP_FREE (data->relevant);
5395   BITMAP_FREE (data->important_candidates);
5396
5397   VEC_free (tree, heap, decl_rtl_to_reset);
5398   VEC_free (iv_use_p, heap, data->iv_uses);
5399   VEC_free (iv_cand_p, heap, data->iv_candidates);
5400 }
5401
5402 /* Optimizes the LOOP.  Returns true if anything changed.  */
5403
5404 static bool
5405 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5406 {
5407   bool changed = false;
5408   struct iv_ca *iv_ca;
5409   edge exit;
5410
5411   gcc_assert (!data->niters);
5412   data->current_loop = loop;
5413
5414   if (dump_file && (dump_flags & TDF_DETAILS))
5415     {
5416       fprintf (dump_file, "Processing loop %d\n", loop->num);
5417
5418       exit = single_dom_exit (loop);
5419       if (exit)
5420         {
5421           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5422                    exit->src->index, exit->dest->index);
5423           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5424           fprintf (dump_file, "\n");
5425         }
5426
5427       fprintf (dump_file, "\n");
5428     }
5429
5430   /* For each ssa name determines whether it behaves as an induction variable
5431      in some loop.  */
5432   if (!find_induction_variables (data))
5433     goto finish;
5434
5435   /* Finds interesting uses (item 1).  */
5436   find_interesting_uses (data);
5437   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5438     goto finish;
5439
5440   /* Finds candidates for the induction variables (item 2).  */
5441   find_iv_candidates (data);
5442
5443   /* Calculates the costs (item 3, part 1).  */
5444   determine_use_iv_costs (data);
5445   determine_iv_costs (data);
5446   determine_set_costs (data);
5447
5448   /* Find the optimal set of induction variables (item 3, part 2).  */
5449   iv_ca = find_optimal_iv_set (data);
5450   if (!iv_ca)
5451     goto finish;
5452   changed = true;
5453
5454   /* Create the new induction variables (item 4, part 1).  */
5455   create_new_ivs (data, iv_ca);
5456   iv_ca_free (&iv_ca);
5457
5458   /* Rewrite the uses (item 4, part 2).  */
5459   rewrite_uses (data);
5460
5461   /* Remove the ivs that are unused after rewriting.  */
5462   remove_unused_ivs (data);
5463
5464   /* We have changed the structure of induction variables; it might happen
5465      that definitions in the scev database refer to some of them that were
5466      eliminated.  */
5467   scev_reset ();
5468
5469 finish:
5470   free_loop_data (data);
5471
5472   return changed;
5473 }
5474
5475 /* Main entry point.  Optimizes induction variables in loops.  */
5476
5477 void
5478 tree_ssa_iv_optimize (void)
5479 {
5480   struct loop *loop;
5481   struct ivopts_data data;
5482   loop_iterator li;
5483
5484   tree_ssa_iv_optimize_init (&data);
5485
5486   /* Optimize the loops starting with the innermost ones.  */
5487   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5488     {
5489       if (dump_file && (dump_flags & TDF_DETAILS))
5490         flow_loop_dump (loop, dump_file, NULL, 1);
5491
5492       tree_ssa_iv_optimize_loop (&data, loop);
5493     }
5494
5495   tree_ssa_iv_optimize_finalize (&data);
5496 }