gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "rtl.h"
  70 #include "tm_p.h"
  71 #include "hard-reg-set.h"
  72 #include "basic-block.h"
  73 #include "output.h"
  74 #include "diagnostic.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "varray.h"
  80 #include "expr.h"
  81 #include "tree-pass.h"
  82 #include "ggc.h"
  83 #include "insn-config.h"
  84 #include "recog.h"
  85 #include "pointer-set.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92 #include "tree-affine.h"
  93 #include "target.h"
  94
  95 /* The infinite cost.  */
  96 #define INFTY 10000000
  97
  98 /* The expected number of loop iterations.  TODO -- use profiling instead of
  99    this.  */
 100 #define AVG_LOOP_NITER(LOOP) 5
 101
 102
 103 /* Representation of the induction variable.  */
 104 struct iv
 105 {
 106   tree base;            /* Initial value of the iv.  */
 107   tree base_object;     /* A memory object to that the induction variable points.  */
 108   tree step;            /* Step of the iv (constant only).  */
 109   tree ssa_name;        /* The ssa name with the value.  */
 110   bool biv_p;           /* Is it a biv?  */
 111   bool have_use_for;    /* Do we already have a use for it?  */
 112   unsigned use_id;      /* The identifier in the use if it is the case.  */
 113 };
 114
 115 /* Per-ssa version information (induction variable descriptions, etc.).  */
 116 struct version_info
 117 {
 118   tree name;            /* The ssa name.  */
 119   struct iv *iv;        /* Induction variable description.  */
 120   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 121                            an expression that is not an induction variable.  */
 122   unsigned inv_id;      /* Id of an invariant.  */
 123   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 124 };
 125
 126 /* Types of uses.  */
 127 enum use_type
 128 {
 129   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 130   USE_ADDRESS,          /* Use in an address.  */
 131   USE_COMPARE           /* Use is a compare.  */
 132 };
 133
 134 /* Cost of a computation.  */
 135 typedef struct
 136 {
 137   unsigned cost;        /* The runtime cost.  */
 138   unsigned complexity;  /* The estimate of the complexity of the code for
 139                            the computation (in no concrete units --
 140                            complexity field should be larger for more
 141                            complex expressions and addressing modes).  */
 142 } comp_cost;
 143
 144 static const comp_cost zero_cost = {0, 0};
 145 static const comp_cost infinite_cost = {INFTY, INFTY};
 146
 147 /* The candidate - cost pair.  */
 148 struct cost_pair
 149 {
 150   struct iv_cand *cand; /* The candidate.  */
 151   comp_cost cost;       /* The cost.  */
 152   bitmap depends_on;    /* The list of invariants that have to be
 153                            preserved.  */
 154   tree value;           /* For final value elimination, the expression for
 155                            the final value of the iv.  For iv elimination,
 156                            the new bound to compare with.  */
 157 };
 158
 159 /* Use.  */
 160 struct iv_use
 161 {
 162   unsigned id;          /* The id of the use.  */
 163   enum use_type type;   /* Type of the use.  */
 164   struct iv *iv;        /* The induction variable it is based on.  */
 165   tree stmt;            /* Statement in that it occurs.  */
 166   tree *op_p;           /* The place where it occurs.  */
 167   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 168                            important ones.  */
 169
 170   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 171   struct cost_pair *cost_map;
 172                         /* The costs wrto the iv candidates.  */
 173
 174   struct iv_cand *selected;
 175                         /* The selected candidate.  */
 176 };
 177
 178 /* The position where the iv is computed.  */
 179 enum iv_position
 180 {
 181   IP_NORMAL,            /* At the end, just before the exit condition.  */
 182   IP_END,               /* At the end of the latch block.  */
 183   IP_ORIGINAL           /* The original biv.  */
 184 };
 185
 186 /* The induction variable candidate.  */
 187 struct iv_cand
 188 {
 189   unsigned id;          /* The number of the candidate.  */
 190   bool important;       /* Whether this is an "important" candidate, i.e. such
 191                            that it should be considered by all uses.  */
 192   enum iv_position pos; /* Where it is computed.  */
 193   tree incremented_at;  /* For original biv, the statement where it is
 194                            incremented.  */
 195   tree var_before;      /* The variable used for it before increment.  */
 196   tree var_after;       /* The variable used for it after increment.  */
 197   struct iv *iv;        /* The value of the candidate.  NULL for
 198                            "pseudocandidate" used to indicate the possibility
 199                            to replace the final value of an iv by direct
 200                            computation of the value.  */
 201   unsigned cost;        /* Cost of the candidate.  */
 202   bitmap depends_on;    /* The list of invariants that are used in step of the
 203                            biv.  */
 204 };
 205
 206 /* The data used by the induction variable optimizations.  */
 207
 208 typedef struct iv_use *iv_use_p;
 209 DEF_VEC_P(iv_use_p);
 210 DEF_VEC_ALLOC_P(iv_use_p,heap);
 211
 212 typedef struct iv_cand *iv_cand_p;
 213 DEF_VEC_P(iv_cand_p);
 214 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 215
 216 struct ivopts_data
 217 {
 218   /* The currently optimized loop.  */
 219   struct loop *current_loop;
 220
 221   /* Number of registers used in it.  */
 222   unsigned regs_used;
 223
 224   /* Numbers of iterations for all exits of the current loop.  */
 225   struct pointer_map_t *niters;
 226
 227   /* The size of version_info array allocated.  */
 228   unsigned version_info_size;
 229
 230   /* The array of information for the ssa names.  */
 231   struct version_info *version_info;
 232
 233   /* The bitmap of indices in version_info whose value was changed.  */
 234   bitmap relevant;
 235
 236   /* The maximum invariant id.  */
 237   unsigned max_inv_id;
 238
 239   /* The uses of induction variables.  */
 240   VEC(iv_use_p,heap) *iv_uses;
 241
 242   /* The candidates.  */
 243   VEC(iv_cand_p,heap) *iv_candidates;
 244
 245   /* A bitmap of important candidates.  */
 246   bitmap important_candidates;
 247
 248   /* Whether to consider just related and important candidates when replacing a
 249      use.  */
 250   bool consider_all_candidates;
 251 };
 252
 253 /* An assignment of iv candidates to uses.  */
 254
 255 struct iv_ca
 256 {
 257   /* The number of uses covered by the assignment.  */
 258   unsigned upto;
 259
 260   /* Number of uses that cannot be expressed by the candidates in the set.  */
 261   unsigned bad_uses;
 262
 263   /* Candidate assigned to a use, together with the related costs.  */
 264   struct cost_pair **cand_for_use;
 265
 266   /* Number of times each candidate is used.  */
 267   unsigned *n_cand_uses;
 268
 269   /* The candidates used.  */
 270   bitmap cands;
 271
 272   /* The number of candidates in the set.  */
 273   unsigned n_cands;
 274
 275   /* Total number of registers needed.  */
 276   unsigned n_regs;
 277
 278   /* Total cost of expressing uses.  */
 279   comp_cost cand_use_cost;
 280
 281   /* Total cost of candidates.  */
 282   unsigned cand_cost;
 283
 284   /* Number of times each invariant is used.  */
 285   unsigned *n_invariant_uses;
 286
 287   /* Total cost of the assignment.  */
 288   comp_cost cost;
 289 };
 290
 291 /* Difference of two iv candidate assignments.  */
 292
 293 struct iv_ca_delta
 294 {
 295   /* Changed use.  */
 296   struct iv_use *use;
 297
 298   /* An old assignment (for rollback purposes).  */
 299   struct cost_pair *old_cp;
 300
 301   /* A new assignment.  */
 302   struct cost_pair *new_cp;
 303
 304   /* Next change in the list.  */
 305   struct iv_ca_delta *next_change;
 306 };
 307
 308 /* Bound on number of candidates below that all candidates are considered.  */
 309
 310 #define CONSIDER_ALL_CANDIDATES_BOUND \
 311   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 312
 313 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 314    optimizing such a loop would help, and it would take ages).  */
 315
 316 #define MAX_CONSIDERED_USES \
 317   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 318
 319 /* If there are at most this number of ivs in the set, try removing unnecessary
 320    ivs from the set always.  */
 321
 322 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 323   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 324
 325 /* The list of trees for that the decl_rtl field must be reset is stored
 326    here.  */
 327
 328 static VEC(tree,heap) *decl_rtl_to_reset;
 329
 330 /* Number of uses recorded in DATA.  */
 331
 332 static inline unsigned
 333 n_iv_uses (struct ivopts_data *data)
 334 {
 335   return VEC_length (iv_use_p, data->iv_uses);
 336 }
 337
 338 /* Ith use recorded in DATA.  */
 339
 340 static inline struct iv_use *
 341 iv_use (struct ivopts_data *data, unsigned i)
 342 {
 343   return VEC_index (iv_use_p, data->iv_uses, i);
 344 }
 345
 346 /* Number of candidates recorded in DATA.  */
 347
 348 static inline unsigned
 349 n_iv_cands (struct ivopts_data *data)
 350 {
 351   return VEC_length (iv_cand_p, data->iv_candidates);
 352 }
 353
 354 /* Ith candidate recorded in DATA.  */
 355
 356 static inline struct iv_cand *
 357 iv_cand (struct ivopts_data *data, unsigned i)
 358 {
 359   return VEC_index (iv_cand_p, data->iv_candidates, i);
 360 }
 361
 362 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 363
 364 edge
 365 single_dom_exit (struct loop *loop)
 366 {
 367   edge exit = single_exit (loop);
 368
 369   if (!exit)
 370     return NULL;
 371
 372   if (!just_once_each_iteration_p (loop, exit->src))
 373     return NULL;
 374
 375   return exit;
 376 }
 377
 378 /* Dumps information about the induction variable IV to FILE.  */
 379
 380 extern void dump_iv (FILE *, struct iv *);
 381 void
 382 dump_iv (FILE *file, struct iv *iv)
 383 {
 384   if (iv->ssa_name)
 385     {
 386       fprintf (file, "ssa name ");
 387       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 388       fprintf (file, "\n");
 389     }
 390
 391   fprintf (file, "  type ");
 392   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 393   fprintf (file, "\n");
 394
 395   if (iv->step)
 396     {
 397       fprintf (file, "  base ");
 398       print_generic_expr (file, iv->base, TDF_SLIM);
 399       fprintf (file, "\n");
 400
 401       fprintf (file, "  step ");
 402       print_generic_expr (file, iv->step, TDF_SLIM);
 403       fprintf (file, "\n");
 404     }
 405   else
 406     {
 407       fprintf (file, "  invariant ");
 408       print_generic_expr (file, iv->base, TDF_SLIM);
 409       fprintf (file, "\n");
 410     }
 411
 412   if (iv->base_object)
 413     {
 414       fprintf (file, "  base object ");
 415       print_generic_expr (file, iv->base_object, TDF_SLIM);
 416       fprintf (file, "\n");
 417     }
 418
 419   if (iv->biv_p)
 420     fprintf (file, "  is a biv\n");
 421 }
 422
 423 /* Dumps information about the USE to FILE.  */
 424
 425 extern void dump_use (FILE *, struct iv_use *);
 426 void
 427 dump_use (FILE *file, struct iv_use *use)
 428 {
 429   fprintf (file, "use %d\n", use->id);
 430
 431   switch (use->type)
 432     {
 433     case USE_NONLINEAR_EXPR:
 434       fprintf (file, "  generic\n");
 435       break;
 436
 437     case USE_ADDRESS:
 438       fprintf (file, "  address\n");
 439       break;
 440
 441     case USE_COMPARE:
 442       fprintf (file, "  compare\n");
 443       break;
 444
 445     default:
 446       gcc_unreachable ();
 447     }
 448
 449   fprintf (file, "  in statement ");
 450   print_generic_expr (file, use->stmt, TDF_SLIM);
 451   fprintf (file, "\n");
 452
 453   fprintf (file, "  at position ");
 454   if (use->op_p)
 455     print_generic_expr (file, *use->op_p, TDF_SLIM);
 456   fprintf (file, "\n");
 457
 458   dump_iv (file, use->iv);
 459
 460   if (use->related_cands)
 461     {
 462       fprintf (file, "  related candidates ");
 463       dump_bitmap (file, use->related_cands);
 464     }
 465 }
 466
 467 /* Dumps information about the uses to FILE.  */
 468
 469 extern void dump_uses (FILE *, struct ivopts_data *);
 470 void
 471 dump_uses (FILE *file, struct ivopts_data *data)
 472 {
 473   unsigned i;
 474   struct iv_use *use;
 475
 476   for (i = 0; i < n_iv_uses (data); i++)
 477     {
 478       use = iv_use (data, i);
 479
 480       dump_use (file, use);
 481       fprintf (file, "\n");
 482     }
 483 }
 484
 485 /* Dumps information about induction variable candidate CAND to FILE.  */
 486
 487 extern void dump_cand (FILE *, struct iv_cand *);
 488 void
 489 dump_cand (FILE *file, struct iv_cand *cand)
 490 {
 491   struct iv *iv = cand->iv;
 492
 493   fprintf (file, "candidate %d%s\n",
 494            cand->id, cand->important ? " (important)" : "");
 495
 496   if (cand->depends_on)
 497     {
 498       fprintf (file, "  depends on ");
 499       dump_bitmap (file, cand->depends_on);
 500     }
 501
 502   if (!iv)
 503     {
 504       fprintf (file, "  final value replacement\n");
 505       return;
 506     }
 507
 508   switch (cand->pos)
 509     {
 510     case IP_NORMAL:
 511       fprintf (file, "  incremented before exit test\n");
 512       break;
 513
 514     case IP_END:
 515       fprintf (file, "  incremented at end\n");
 516       break;
 517
 518     case IP_ORIGINAL:
 519       fprintf (file, "  original biv\n");
 520       break;
 521     }
 522
 523   dump_iv (file, iv);
 524 }
 525
 526 /* Returns the info for ssa version VER.  */
 527
 528 static inline struct version_info *
 529 ver_info (struct ivopts_data *data, unsigned ver)
 530 {
 531   return data->version_info + ver;
 532 }
 533
 534 /* Returns the info for ssa name NAME.  */
 535
 536 static inline struct version_info *
 537 name_info (struct ivopts_data *data, tree name)
 538 {
 539   return ver_info (data, SSA_NAME_VERSION (name));
 540 }
 541
 542 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 543    emitted in LOOP.  */
 544
 545 static bool
 546 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 547 {
 548   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 549
 550   gcc_assert (bb);
 551
 552   if (sbb == loop->latch)
 553     return true;
 554
 555   if (sbb != bb)
 556     return false;
 557
 558   return stmt == last_stmt (bb);
 559 }
 560
 561 /* Returns true if STMT if after the place where the original induction
 562    variable CAND is incremented.  */
 563
 564 static bool
 565 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 566 {
 567   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 568   basic_block stmt_bb = bb_for_stmt (stmt);
 569   block_stmt_iterator bsi;
 570
 571   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 572     return false;
 573
 574   if (stmt_bb != cand_bb)
 575     return true;
 576
 577   /* Scan the block from the end, since the original ivs are usually
 578      incremented at the end of the loop body.  */
 579   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 580     {
 581       if (bsi_stmt (bsi) == cand->incremented_at)
 582         return false;
 583       if (bsi_stmt (bsi) == stmt)
 584         return true;
 585     }
 586 }
 587
 588 /* Returns true if STMT if after the place where the induction variable
 589    CAND is incremented in LOOP.  */
 590
 591 static bool
 592 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 593 {
 594   switch (cand->pos)
 595     {
 596     case IP_END:
 597       return false;
 598
 599     case IP_NORMAL:
 600       return stmt_after_ip_normal_pos (loop, stmt);
 601
 602     case IP_ORIGINAL:
 603       return stmt_after_ip_original_pos (cand, stmt);
 604
 605     default:
 606       gcc_unreachable ();
 607     }
 608 }
 609
 610 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 611
 612 static bool
 613 abnormal_ssa_name_p (tree exp)
 614 {
 615   if (!exp)
 616     return false;
 617
 618   if (TREE_CODE (exp) != SSA_NAME)
 619     return false;
 620
 621   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 622 }
 623
 624 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 625    abnormal phi node.  Callback for for_each_index.  */
 626
 627 static bool
 628 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 629                                   void *data ATTRIBUTE_UNUSED)
 630 {
 631   if (TREE_CODE (base) == ARRAY_REF)
 632     {
 633       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 634         return false;
 635       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 636         return false;
 637     }
 638
 639   return !abnormal_ssa_name_p (*index);
 640 }
 641
 642 /* Returns true if EXPR contains a ssa name that occurs in an
 643    abnormal phi node.  */
 644
 645 bool
 646 contains_abnormal_ssa_name_p (tree expr)
 647 {
 648   enum tree_code code;
 649   enum tree_code_class codeclass;
 650
 651   if (!expr)
 652     return false;
 653
 654   code = TREE_CODE (expr);
 655   codeclass = TREE_CODE_CLASS (code);
 656
 657   if (code == SSA_NAME)
 658     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 659
 660   if (code == INTEGER_CST
 661       || is_gimple_min_invariant (expr))
 662     return false;
 663
 664   if (code == ADDR_EXPR)
 665     return !for_each_index (&TREE_OPERAND (expr, 0),
 666                             idx_contains_abnormal_ssa_name_p,
 667                             NULL);
 668
 669   switch (codeclass)
 670     {
 671     case tcc_binary:
 672     case tcc_comparison:
 673       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 674         return true;
 675
 676       /* Fallthru.  */
 677     case tcc_unary:
 678       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 679         return true;
 680
 681       break;
 682
 683     default:
 684       gcc_unreachable ();
 685     }
 686
 687   return false;
 688 }
 689
 690 /*  Returns tree describing number of iterations determined from
 691     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 692
 693 static tree
 694 niter_for_exit (struct ivopts_data *data, edge exit)
 695 {
 696   struct tree_niter_desc desc;
 697   tree niter;
 698   void **slot;
 699
 700   if (!data->niters)
 701     {
 702       data->niters = pointer_map_create ();
 703       slot = NULL;
 704     }
 705   else
 706     slot = pointer_map_contains (data->niters, exit);
 707
 708   if (!slot)
 709     {
 710       /* Try to determine number of iterations.  We must know it
 711          unconditionally (i.e., without possibility of # of iterations
 712          being zero).  Also, we cannot safely work with ssa names that
 713          appear in phi nodes on abnormal edges, so that we do not create
 714          overlapping life ranges for them (PR 27283).  */
 715       if (number_of_iterations_exit (data->current_loop,
 716                                      exit, &desc, true)
 717           && integer_zerop (desc.may_be_zero)
 718           && !contains_abnormal_ssa_name_p (desc.niter))
 719         niter = desc.niter;
 720       else
 721         niter = NULL_TREE;
 722
 723       *pointer_map_insert (data->niters, exit) = niter;
 724     }
 725   else
 726     niter = (tree) *slot;
 727
 728   return niter;
 729 }
 730
 731 /* Returns tree describing number of iterations determined from
 732    single dominating exit of DATA->current_loop, or NULL if something
 733    goes wrong.  */
 734
 735 static tree
 736 niter_for_single_dom_exit (struct ivopts_data *data)
 737 {
 738   edge exit = single_dom_exit (data->current_loop);
 739
 740   if (!exit)
 741     return NULL;
 742
 743   return niter_for_exit (data, exit);
 744 }
 745
 746 /* Initializes data structures used by the iv optimization pass, stored
 747    in DATA.  */
 748
 749 static void
 750 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 751 {
 752   data->version_info_size = 2 * num_ssa_names;
 753   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 754   data->relevant = BITMAP_ALLOC (NULL);
 755   data->important_candidates = BITMAP_ALLOC (NULL);
 756   data->max_inv_id = 0;
 757   data->niters = NULL;
 758   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 759   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 760   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 761 }
 762
 763 /* Returns a memory object to that EXPR points.  In case we are able to
 764    determine that it does not point to any such object, NULL is returned.  */
 765
 766 static tree
 767 determine_base_object (tree expr)
 768 {
 769   enum tree_code code = TREE_CODE (expr);
 770   tree base, obj;
 771
 772   /* If this is a pointer casted to any type, we need to determine
 773      the base object for the pointer; so handle conversions before
 774      throwing away non-pointer expressions.  */
 775   if (TREE_CODE (expr) == NOP_EXPR
 776       || TREE_CODE (expr) == CONVERT_EXPR)
 777     return determine_base_object (TREE_OPERAND (expr, 0));
 778
 779   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 780     return NULL_TREE;
 781
 782   switch (code)
 783     {
 784     case INTEGER_CST:
 785       return NULL_TREE;
 786
 787     case ADDR_EXPR:
 788       obj = TREE_OPERAND (expr, 0);
 789       base = get_base_address (obj);
 790
 791       if (!base)
 792         return expr;
 793
 794       if (TREE_CODE (base) == INDIRECT_REF)
 795         return determine_base_object (TREE_OPERAND (base, 0));
 796
 797       return fold_convert (ptr_type_node,
 798                            build_fold_addr_expr (base));
 799
 800     case POINTER_PLUS_EXPR:
 801       return determine_base_object (TREE_OPERAND (expr, 0));
 802
 803     case PLUS_EXPR:
 804     case MINUS_EXPR:
 805       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 806       gcc_unreachable ();
 807
 808     default:
 809       return fold_convert (ptr_type_node, expr);
 810     }
 811 }
 812
 813 /* Allocates an induction variable with given initial value BASE and step STEP
 814    for loop LOOP.  */
 815
 816 static struct iv *
 817 alloc_iv (tree base, tree step)
 818 {
 819   struct iv *iv = XCNEW (struct iv);
 820   gcc_assert (step != NULL_TREE);
 821
 822   iv->base = base;
 823   iv->base_object = determine_base_object (base);
 824   iv->step = step;
 825   iv->biv_p = false;
 826   iv->have_use_for = false;
 827   iv->use_id = 0;
 828   iv->ssa_name = NULL_TREE;
 829
 830   return iv;
 831 }
 832
 833 /* Sets STEP and BASE for induction variable IV.  */
 834
 835 static void
 836 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 837 {
 838   struct version_info *info = name_info (data, iv);
 839
 840   gcc_assert (!info->iv);
 841
 842   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 843   info->iv = alloc_iv (base, step);
 844   info->iv->ssa_name = iv;
 845 }
 846
 847 /* Finds induction variable declaration for VAR.  */
 848
 849 static struct iv *
 850 get_iv (struct ivopts_data *data, tree var)
 851 {
 852   basic_block bb;
 853   tree type = TREE_TYPE (var);
 854
 855   if (!POINTER_TYPE_P (type)
 856       && !INTEGRAL_TYPE_P (type))
 857     return NULL;
 858
 859   if (!name_info (data, var)->iv)
 860     {
 861       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 862
 863       if (!bb
 864           || !flow_bb_inside_loop_p (data->current_loop, bb))
 865         set_iv (data, var, var, build_int_cst (type, 0));
 866     }
 867
 868   return name_info (data, var)->iv;
 869 }
 870
 871 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 872    not define a simple affine biv with nonzero step.  */
 873
 874 static tree
 875 determine_biv_step (tree phi)
 876 {
 877   struct loop *loop = bb_for_stmt (phi)->loop_father;
 878   tree name = PHI_RESULT (phi);
 879   affine_iv iv;
 880
 881   if (!is_gimple_reg (name))
 882     return NULL_TREE;
 883
 884   if (!simple_iv (loop, phi, name, &iv, true))
 885     return NULL_TREE;
 886
 887   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 888 }
 889
 890 /* Finds basic ivs.  */
 891
 892 static bool
 893 find_bivs (struct ivopts_data *data)
 894 {
 895   tree phi, step, type, base;
 896   bool found = false;
 897   struct loop *loop = data->current_loop;
 898
 899   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 900     {
 901       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 902         continue;
 903
 904       step = determine_biv_step (phi);
 905       if (!step)
 906         continue;
 907
 908       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 909       base = expand_simple_operations (base);
 910       if (contains_abnormal_ssa_name_p (base)
 911           || contains_abnormal_ssa_name_p (step))
 912         continue;
 913
 914       type = TREE_TYPE (PHI_RESULT (phi));
 915       base = fold_convert (type, base);
 916       if (step)
 917         step = fold_convert (type, step);
 918
 919       set_iv (data, PHI_RESULT (phi), base, step);
 920       found = true;
 921     }
 922
 923   return found;
 924 }
 925
 926 /* Marks basic ivs.  */
 927
 928 static void
 929 mark_bivs (struct ivopts_data *data)
 930 {
 931   tree phi, var;
 932   struct iv *iv, *incr_iv;
 933   struct loop *loop = data->current_loop;
 934   basic_block incr_bb;
 935
 936   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 937     {
 938       iv = get_iv (data, PHI_RESULT (phi));
 939       if (!iv)
 940         continue;
 941
 942       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 943       incr_iv = get_iv (data, var);
 944       if (!incr_iv)
 945         continue;
 946
 947       /* If the increment is in the subloop, ignore it.  */
 948       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 949       if (incr_bb->loop_father != data->current_loop
 950           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 951         continue;
 952
 953       iv->biv_p = true;
 954       incr_iv->biv_p = true;
 955     }
 956 }
 957
 958 /* Checks whether STMT defines a linear induction variable and stores its
 959    parameters to IV.  */
 960
 961 static bool
 962 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 963 {
 964   tree lhs;
 965   struct loop *loop = data->current_loop;
 966
 967   iv->base = NULL_TREE;
 968   iv->step = NULL_TREE;
 969
 970   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 971     return false;
 972
 973   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 974   if (TREE_CODE (lhs) != SSA_NAME)
 975     return false;
 976
 977   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 978     return false;
 979   iv->base = expand_simple_operations (iv->base);
 980
 981   if (contains_abnormal_ssa_name_p (iv->base)
 982       || contains_abnormal_ssa_name_p (iv->step))
 983     return false;
 984
 985   return true;
 986 }
 987
 988 /* Finds general ivs in statement STMT.  */
 989
 990 static void
 991 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
 992 {
 993   affine_iv iv;
 994
 995   if (!find_givs_in_stmt_scev (data, stmt, &iv))
 996     return;
 997
 998   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
 999 }
1000
1001 /* Finds general ivs in basic block BB.  */
1002
1003 static void
1004 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1005 {
1006   block_stmt_iterator bsi;
1007
1008   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1009     find_givs_in_stmt (data, bsi_stmt (bsi));
1010 }
1011
1012 /* Finds general ivs.  */
1013
1014 static void
1015 find_givs (struct ivopts_data *data)
1016 {
1017   struct loop *loop = data->current_loop;
1018   basic_block *body = get_loop_body_in_dom_order (loop);
1019   unsigned i;
1020
1021   for (i = 0; i < loop->num_nodes; i++)
1022     find_givs_in_bb (data, body[i]);
1023   free (body);
1024 }
1025
1026 /* For each ssa name defined in LOOP determines whether it is an induction
1027    variable and if so, its initial value and step.  */
1028
1029 static bool
1030 find_induction_variables (struct ivopts_data *data)
1031 {
1032   unsigned i;
1033   bitmap_iterator bi;
1034
1035   if (!find_bivs (data))
1036     return false;
1037
1038   find_givs (data);
1039   mark_bivs (data);
1040
1041   if (dump_file && (dump_flags & TDF_DETAILS))
1042     {
1043       tree niter = niter_for_single_dom_exit (data);
1044
1045       if (niter)
1046         {
1047           fprintf (dump_file, "  number of iterations ");
1048           print_generic_expr (dump_file, niter, TDF_SLIM);
1049           fprintf (dump_file, "\n\n");
1050         };
1051
1052       fprintf (dump_file, "Induction variables:\n\n");
1053
1054       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1055         {
1056           if (ver_info (data, i)->iv)
1057             dump_iv (dump_file, ver_info (data, i)->iv);
1058         }
1059     }
1060
1061   return true;
1062 }
1063
1064 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1065
1066 static struct iv_use *
1067 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1068             tree stmt, enum use_type use_type)
1069 {
1070   struct iv_use *use = XCNEW (struct iv_use);
1071
1072   use->id = n_iv_uses (data);
1073   use->type = use_type;
1074   use->iv = iv;
1075   use->stmt = stmt;
1076   use->op_p = use_p;
1077   use->related_cands = BITMAP_ALLOC (NULL);
1078
1079   /* To avoid showing ssa name in the dumps, if it was not reset by the
1080      caller.  */
1081   iv->ssa_name = NULL_TREE;
1082
1083   if (dump_file && (dump_flags & TDF_DETAILS))
1084     dump_use (dump_file, use);
1085
1086   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1087
1088   return use;
1089 }
1090
1091 /* Checks whether OP is a loop-level invariant and if so, records it.
1092    NONLINEAR_USE is true if the invariant is used in a way we do not
1093    handle specially.  */
1094
1095 static void
1096 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1097 {
1098   basic_block bb;
1099   struct version_info *info;
1100
1101   if (TREE_CODE (op) != SSA_NAME
1102       || !is_gimple_reg (op))
1103     return;
1104
1105   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1106   if (bb
1107       && flow_bb_inside_loop_p (data->current_loop, bb))
1108     return;
1109
1110   info = name_info (data, op);
1111   info->name = op;
1112   info->has_nonlin_use |= nonlinear_use;
1113   if (!info->inv_id)
1114     info->inv_id = ++data->max_inv_id;
1115   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1116 }
1117
1118 /* Checks whether the use OP is interesting and if so, records it.  */
1119
1120 static struct iv_use *
1121 find_interesting_uses_op (struct ivopts_data *data, tree op)
1122 {
1123   struct iv *iv;
1124   struct iv *civ;
1125   tree stmt;
1126   struct iv_use *use;
1127
1128   if (TREE_CODE (op) != SSA_NAME)
1129     return NULL;
1130
1131   iv = get_iv (data, op);
1132   if (!iv)
1133     return NULL;
1134
1135   if (iv->have_use_for)
1136     {
1137       use = iv_use (data, iv->use_id);
1138
1139       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1140       return use;
1141     }
1142
1143   if (integer_zerop (iv->step))
1144     {
1145       record_invariant (data, op, true);
1146       return NULL;
1147     }
1148   iv->have_use_for = true;
1149
1150   civ = XNEW (struct iv);
1151   *civ = *iv;
1152
1153   stmt = SSA_NAME_DEF_STMT (op);
1154   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1155               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1156
1157   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1158   iv->use_id = use->id;
1159
1160   return use;
1161 }
1162
1163 /* Given a condition *COND_P, checks whether it is a compare of an induction
1164    variable and an invariant.  If this is the case, CONTROL_VAR is set
1165    to location of the iv, BOUND to the location of the invariant,
1166    IV_VAR and IV_BOUND are set to the corresponding induction variable
1167    descriptions, and true is returned.  If this is not the case,
1168    CONTROL_VAR and BOUND are set to the arguments of the condition and
1169    false is returned.  */
1170
1171 static bool
1172 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1173                        tree **control_var, tree **bound,
1174                        struct iv **iv_var, struct iv **iv_bound)
1175 {
1176   /* The nodes returned when COND has just one operand.  Note that you should
1177      not modify anything in BOUND or IV_BOUND because of this.  */
1178   static struct iv const_iv;
1179   static tree zero;
1180   tree cond = *cond_p;
1181   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1182   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1183   bool ret = false;
1184
1185   zero = integer_zero_node;
1186   const_iv.step = integer_zero_node;
1187
1188   if (TREE_CODE (cond) == SSA_NAME)
1189     {
1190       op0 = cond_p;
1191       iv0 = get_iv (data, cond);
1192       ret = (iv0 && !integer_zerop (iv0->step));
1193       goto end;
1194     }
1195
1196   if (!COMPARISON_CLASS_P (cond))
1197     {
1198       op0 = cond_p;
1199       goto end;
1200     }
1201
1202   op0 = &TREE_OPERAND (cond, 0);
1203   op1 = &TREE_OPERAND (cond, 1);
1204   if (TREE_CODE (*op0) == SSA_NAME)
1205     iv0 = get_iv (data, *op0);
1206   if (TREE_CODE (*op1) == SSA_NAME)
1207     iv1 = get_iv (data, *op1);
1208
1209   /* Exactly one of the compared values must be an iv, and the other one must
1210      be an invariant.  */
1211   if (!iv0 || !iv1)
1212     goto end;
1213
1214   if (integer_zerop (iv0->step))
1215     {
1216       /* Control variable may be on the other side.  */
1217       tmp_op = op0; op0 = op1; op1 = tmp_op;
1218       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1219     }
1220   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1221
1222 end:
1223   if (control_var)
1224     *control_var = op0;;
1225   if (iv_var)
1226     *iv_var = iv0;;
1227   if (bound)
1228     *bound = op1;
1229   if (iv_bound)
1230     *iv_bound = iv1;
1231
1232   return ret;
1233 }
1234
1235 /* Checks whether the condition *COND_P in STMT is interesting
1236    and if so, records it.  */
1237
1238 static void
1239 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1240 {
1241   tree *var_p, *bound_p;
1242   struct iv *var_iv, *civ;
1243
1244   if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1245     {
1246       find_interesting_uses_op (data, *var_p);
1247       find_interesting_uses_op (data, *bound_p);
1248       return;
1249     }
1250
1251   civ = XNEW (struct iv);
1252   *civ = *var_iv;
1253   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1254 }
1255
1256 /* Returns true if expression EXPR is obviously invariant in LOOP,
1257    i.e. if all its operands are defined outside of the LOOP.  LOOP
1258    should not be the function body.  */
1259
1260 bool
1261 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1262 {
1263   basic_block def_bb;
1264   unsigned i, len;
1265
1266   gcc_assert (loop_depth (loop) > 0);
1267
1268   if (is_gimple_min_invariant (expr))
1269     return true;
1270
1271   if (TREE_CODE (expr) == SSA_NAME)
1272     {
1273       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1274       if (def_bb
1275           && flow_bb_inside_loop_p (loop, def_bb))
1276         return false;
1277
1278       return true;
1279     }
1280
1281   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1282     return false;
1283
1284   len = TREE_OPERAND_LENGTH (expr);
1285   for (i = 0; i < len; i++)
1286     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1287       return false;
1288
1289   return true;
1290 }
1291
1292 /* Cumulates the steps of indices into DATA and replaces their values with the
1293    initial ones.  Returns false when the value of the index cannot be determined.
1294    Callback for for_each_index.  */
1295
1296 struct ifs_ivopts_data
1297 {
1298   struct ivopts_data *ivopts_data;
1299   tree stmt;
1300   tree step;
1301 };
1302
1303 static bool
1304 idx_find_step (tree base, tree *idx, void *data)
1305 {
1306   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1307   struct iv *iv;
1308   tree step, iv_base, iv_step, lbound, off;
1309   struct loop *loop = dta->ivopts_data->current_loop;
1310
1311   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1312       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1313     return false;
1314
1315   /* If base is a component ref, require that the offset of the reference
1316      be invariant.  */
1317   if (TREE_CODE (base) == COMPONENT_REF)
1318     {
1319       off = component_ref_field_offset (base);
1320       return expr_invariant_in_loop_p (loop, off);
1321     }
1322
1323   /* If base is array, first check whether we will be able to move the
1324      reference out of the loop (in order to take its address in strength
1325      reduction).  In order for this to work we need both lower bound
1326      and step to be loop invariants.  */
1327   if (TREE_CODE (base) == ARRAY_REF)
1328     {
1329       step = array_ref_element_size (base);
1330       lbound = array_ref_low_bound (base);
1331
1332       if (!expr_invariant_in_loop_p (loop, step)
1333           || !expr_invariant_in_loop_p (loop, lbound))
1334         return false;
1335     }
1336
1337   if (TREE_CODE (*idx) != SSA_NAME)
1338     return true;
1339
1340   iv = get_iv (dta->ivopts_data, *idx);
1341   if (!iv)
1342     return false;
1343
1344   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1345           *&x[0], which is not folded and does not trigger the
1346           ARRAY_REF path below.  */
1347   *idx = iv->base;
1348
1349   if (integer_zerop (iv->step))
1350     return true;
1351
1352   if (TREE_CODE (base) == ARRAY_REF)
1353     {
1354       step = array_ref_element_size (base);
1355
1356       /* We only handle addresses whose step is an integer constant.  */
1357       if (TREE_CODE (step) != INTEGER_CST)
1358         return false;
1359     }
1360   else
1361     /* The step for pointer arithmetics already is 1 byte.  */
1362     step = build_int_cst (sizetype, 1);
1363
1364   iv_base = iv->base;
1365   iv_step = iv->step;
1366   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1367                             sizetype, &iv_base, &iv_step, dta->stmt,
1368                             false))
1369     {
1370       /* The index might wrap.  */
1371       return false;
1372     }
1373
1374   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1375   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1376
1377   return true;
1378 }
1379
1380 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1381    object is passed to it in DATA.  */
1382
1383 static bool
1384 idx_record_use (tree base, tree *idx,
1385                 void *vdata)
1386 {
1387   struct ivopts_data *data = (struct ivopts_data *) vdata;
1388   find_interesting_uses_op (data, *idx);
1389   if (TREE_CODE (base) == ARRAY_REF)
1390     {
1391       find_interesting_uses_op (data, array_ref_element_size (base));
1392       find_interesting_uses_op (data, array_ref_low_bound (base));
1393     }
1394   return true;
1395 }
1396
1397 /* If we can prove that TOP = cst * BOT for some constant cst,
1398    store cst to MUL and return true.  Otherwise return false.
1399    The returned value is always sign-extended, regardless of the
1400    signedness of TOP and BOT.  */
1401
1402 static bool
1403 constant_multiple_of (tree top, tree bot, double_int *mul)
1404 {
1405   tree mby;
1406   enum tree_code code;
1407   double_int res, p0, p1;
1408   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1409
1410   STRIP_NOPS (top);
1411   STRIP_NOPS (bot);
1412
1413   if (operand_equal_p (top, bot, 0))
1414     {
1415       *mul = double_int_one;
1416       return true;
1417     }
1418
1419   code = TREE_CODE (top);
1420   switch (code)
1421     {
1422     case MULT_EXPR:
1423       mby = TREE_OPERAND (top, 1);
1424       if (TREE_CODE (mby) != INTEGER_CST)
1425         return false;
1426
1427       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1428         return false;
1429
1430       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1431                               precision);
1432       return true;
1433
1434     case PLUS_EXPR:
1435     case MINUS_EXPR:
1436       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1437           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1438         return false;
1439
1440       if (code == MINUS_EXPR)
1441         p1 = double_int_neg (p1);
1442       *mul = double_int_sext (double_int_add (p0, p1), precision);
1443       return true;
1444
1445     case INTEGER_CST:
1446       if (TREE_CODE (bot) != INTEGER_CST)
1447         return false;
1448
1449       p0 = double_int_sext (tree_to_double_int (top), precision);
1450       p1 = double_int_sext (tree_to_double_int (bot), precision);
1451       if (double_int_zero_p (p1))
1452         return false;
1453       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1454                               precision);
1455       return double_int_zero_p (res);
1456
1457     default:
1458       return false;
1459     }
1460 }
1461
1462 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1463
1464 static bool
1465 may_be_unaligned_p (tree ref, tree step)
1466 {
1467   tree base;
1468   tree base_type;
1469   HOST_WIDE_INT bitsize;
1470   HOST_WIDE_INT bitpos;
1471   tree toffset;
1472   enum machine_mode mode;
1473   int unsignedp, volatilep;
1474   unsigned base_align;
1475
1476   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1477      thus they are not misaligned.  */
1478   if (TREE_CODE (ref) == TARGET_MEM_REF)
1479     return false;
1480
1481   /* The test below is basically copy of what expr.c:normal_inner_ref
1482      does to check whether the object must be loaded by parts when
1483      STRICT_ALIGNMENT is true.  */
1484   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1485                               &unsignedp, &volatilep, true);
1486   base_type = TREE_TYPE (base);
1487   base_align = TYPE_ALIGN (base_type);
1488
1489   if (mode != BLKmode)
1490     {
1491       double_int mul;
1492       tree al = build_int_cst (TREE_TYPE (step),
1493                                GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1494
1495       if (base_align < GET_MODE_ALIGNMENT (mode)
1496           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1497           || bitpos % BITS_PER_UNIT != 0)
1498         return true;
1499
1500       if (!constant_multiple_of (step, al, &mul))
1501         return true;
1502     }
1503
1504   return false;
1505 }
1506
1507 /* Return true if EXPR may be non-addressable.   */
1508
1509 static bool
1510 may_be_nonaddressable_p (tree expr)
1511 {
1512   switch (TREE_CODE (expr))
1513     {
1514     case TARGET_MEM_REF:
1515       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1516          target, thus they are always addressable.  */
1517       return false;
1518
1519     case COMPONENT_REF:
1520       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1521              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1522
1523     case VIEW_CONVERT_EXPR:
1524       /* This kind of view-conversions may wrap non-addressable objects
1525          and make them look addressable.  After some processing the
1526          non-addressability may be uncovered again, causing ADDR_EXPRs
1527          of inappropriate objects to be built.  */
1528       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1529           || is_gimple_min_invariant (TREE_OPERAND (expr, 0)))
1530         return true;
1531
1532       /* ... fall through ... */
1533
1534     case ARRAY_REF:
1535     case ARRAY_RANGE_REF:
1536       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1537
1538     case CONVERT_EXPR:
1539     case NON_LVALUE_EXPR:
1540     case NOP_EXPR:
1541       return true;
1542
1543     default:
1544       break;
1545     }
1546
1547   return false;
1548 }
1549
1550 /* Finds addresses in *OP_P inside STMT.  */
1551
1552 static void
1553 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1554 {
1555   tree base = *op_p, step = build_int_cst (sizetype, 0);
1556   struct iv *civ;
1557   struct ifs_ivopts_data ifs_ivopts_data;
1558
1559   /* Do not play with volatile memory references.  A bit too conservative,
1560      perhaps, but safe.  */
1561   if (stmt_ann (stmt)->has_volatile_ops)
1562     goto fail;
1563
1564   /* Ignore bitfields for now.  Not really something terribly complicated
1565      to handle.  TODO.  */
1566   if (TREE_CODE (base) == BIT_FIELD_REF)
1567     goto fail;
1568
1569   base = unshare_expr (base);
1570
1571   if (TREE_CODE (base) == TARGET_MEM_REF)
1572     {
1573       tree type = build_pointer_type (TREE_TYPE (base));
1574       tree astep;
1575
1576       if (TMR_BASE (base)
1577           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1578         {
1579           civ = get_iv (data, TMR_BASE (base));
1580           if (!civ)
1581             goto fail;
1582
1583           TMR_BASE (base) = civ->base;
1584           step = civ->step;
1585         }
1586       if (TMR_INDEX (base)
1587           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1588         {
1589           civ = get_iv (data, TMR_INDEX (base));
1590           if (!civ)
1591             goto fail;
1592
1593           TMR_INDEX (base) = civ->base;
1594           astep = civ->step;
1595
1596           if (astep)
1597             {
1598               if (TMR_STEP (base))
1599                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1600
1601               step = fold_build2 (PLUS_EXPR, type, step, astep);
1602             }
1603         }
1604
1605       if (integer_zerop (step))
1606         goto fail;
1607       base = tree_mem_ref_addr (type, base);
1608     }
1609   else
1610     {
1611       ifs_ivopts_data.ivopts_data = data;
1612       ifs_ivopts_data.stmt = stmt;
1613       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1614       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1615           || integer_zerop (ifs_ivopts_data.step))
1616         goto fail;
1617       step = ifs_ivopts_data.step;
1618
1619       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1620       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1621
1622       /* Check that the base expression is addressable.  This needs
1623          to be done after substituting bases of IVs into it.  */
1624       if (may_be_nonaddressable_p (base))
1625         goto fail;
1626
1627       /* Moreover, on strict alignment platforms, check that it is
1628          sufficiently aligned.  */
1629       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1630         goto fail;
1631
1632       base = build_fold_addr_expr (base);
1633
1634       /* Substituting bases of IVs into the base expression might
1635          have caused folding opportunities.  */
1636       if (TREE_CODE (base) == ADDR_EXPR)
1637         {
1638           tree *ref = &TREE_OPERAND (base, 0);
1639           while (handled_component_p (*ref))
1640             ref = &TREE_OPERAND (*ref, 0);
1641           if (TREE_CODE (*ref) == INDIRECT_REF)
1642             *ref = fold_indirect_ref (*ref);
1643         }
1644     }
1645
1646   civ = alloc_iv (base, step);
1647   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1648   return;
1649
1650 fail:
1651   for_each_index (op_p, idx_record_use, data);
1652 }
1653
1654 /* Finds and records invariants used in STMT.  */
1655
1656 static void
1657 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1658 {
1659   ssa_op_iter iter;
1660   use_operand_p use_p;
1661   tree op;
1662
1663   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1664     {
1665       op = USE_FROM_PTR (use_p);
1666       record_invariant (data, op, false);
1667     }
1668 }
1669
1670 /* Finds interesting uses of induction variables in the statement STMT.  */
1671
1672 static void
1673 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1674 {
1675   struct iv *iv;
1676   tree op, lhs, rhs;
1677   ssa_op_iter iter;
1678   use_operand_p use_p;
1679
1680   find_invariants_stmt (data, stmt);
1681
1682   if (TREE_CODE (stmt) == COND_EXPR)
1683     {
1684       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1685       return;
1686     }
1687
1688   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1689     {
1690       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1691       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1692
1693       if (TREE_CODE (lhs) == SSA_NAME)
1694         {
1695           /* If the statement defines an induction variable, the uses are not
1696              interesting by themselves.  */
1697
1698           iv = get_iv (data, lhs);
1699
1700           if (iv && !integer_zerop (iv->step))
1701             return;
1702         }
1703
1704       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1705         {
1706         case tcc_comparison:
1707           find_interesting_uses_cond (data, stmt,
1708                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1709           return;
1710
1711         case tcc_reference:
1712           find_interesting_uses_address (data, stmt,
1713                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1714           if (REFERENCE_CLASS_P (lhs))
1715             find_interesting_uses_address (data, stmt,
1716                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1717           return;
1718
1719         default: ;
1720         }
1721
1722       if (REFERENCE_CLASS_P (lhs)
1723           && is_gimple_val (rhs))
1724         {
1725           find_interesting_uses_address (data, stmt,
1726                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1727           find_interesting_uses_op (data, rhs);
1728           return;
1729         }
1730
1731       /* TODO -- we should also handle address uses of type
1732
1733          memory = call (whatever);
1734
1735          and
1736
1737          call (memory).  */
1738     }
1739
1740   if (TREE_CODE (stmt) == PHI_NODE
1741       && bb_for_stmt (stmt) == data->current_loop->header)
1742     {
1743       lhs = PHI_RESULT (stmt);
1744       iv = get_iv (data, lhs);
1745
1746       if (iv && !integer_zerop (iv->step))
1747         return;
1748     }
1749
1750   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1751     {
1752       op = USE_FROM_PTR (use_p);
1753
1754       if (TREE_CODE (op) != SSA_NAME)
1755         continue;
1756
1757       iv = get_iv (data, op);
1758       if (!iv)
1759         continue;
1760
1761       find_interesting_uses_op (data, op);
1762     }
1763 }
1764
1765 /* Finds interesting uses of induction variables outside of loops
1766    on loop exit edge EXIT.  */
1767
1768 static void
1769 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1770 {
1771   tree phi, def;
1772
1773   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1774     {
1775       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1776       if (is_gimple_reg (def))
1777         find_interesting_uses_op (data, def);
1778     }
1779 }
1780
1781 /* Finds uses of the induction variables that are interesting.  */
1782
1783 static void
1784 find_interesting_uses (struct ivopts_data *data)
1785 {
1786   basic_block bb;
1787   block_stmt_iterator bsi;
1788   tree phi;
1789   basic_block *body = get_loop_body (data->current_loop);
1790   unsigned i;
1791   struct version_info *info;
1792   edge e;
1793
1794   if (dump_file && (dump_flags & TDF_DETAILS))
1795     fprintf (dump_file, "Uses:\n\n");
1796
1797   for (i = 0; i < data->current_loop->num_nodes; i++)
1798     {
1799       edge_iterator ei;
1800       bb = body[i];
1801
1802       FOR_EACH_EDGE (e, ei, bb->succs)
1803         if (e->dest != EXIT_BLOCK_PTR
1804             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1805           find_interesting_uses_outside (data, e);
1806
1807       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1808         find_interesting_uses_stmt (data, phi);
1809       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1810         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1811     }
1812
1813   if (dump_file && (dump_flags & TDF_DETAILS))
1814     {
1815       bitmap_iterator bi;
1816
1817       fprintf (dump_file, "\n");
1818
1819       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1820         {
1821           info = ver_info (data, i);
1822           if (info->inv_id)
1823             {
1824               fprintf (dump_file, "  ");
1825               print_generic_expr (dump_file, info->name, TDF_SLIM);
1826               fprintf (dump_file, " is invariant (%d)%s\n",
1827                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1828             }
1829         }
1830
1831       fprintf (dump_file, "\n");
1832     }
1833
1834   free (body);
1835 }
1836
1837 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1838    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1839    we are at the top-level of the processed address.  */
1840
1841 static tree
1842 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1843                 unsigned HOST_WIDE_INT *offset)
1844 {
1845   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1846   enum tree_code code;
1847   tree type, orig_type = TREE_TYPE (expr);
1848   unsigned HOST_WIDE_INT off0, off1, st;
1849   tree orig_expr = expr;
1850
1851   STRIP_NOPS (expr);
1852
1853   type = TREE_TYPE (expr);
1854   code = TREE_CODE (expr);
1855   *offset = 0;
1856
1857   switch (code)
1858     {
1859     case INTEGER_CST:
1860       if (!cst_and_fits_in_hwi (expr)
1861           || integer_zerop (expr))
1862         return orig_expr;
1863
1864       *offset = int_cst_value (expr);
1865       return build_int_cst (orig_type, 0);
1866
1867     case POINTER_PLUS_EXPR:
1868     case PLUS_EXPR:
1869     case MINUS_EXPR:
1870       op0 = TREE_OPERAND (expr, 0);
1871       op1 = TREE_OPERAND (expr, 1);
1872
1873       op0 = strip_offset_1 (op0, false, false, &off0);
1874       op1 = strip_offset_1 (op1, false, false, &off1);
1875
1876       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1877       if (op0 == TREE_OPERAND (expr, 0)
1878           && op1 == TREE_OPERAND (expr, 1))
1879         return orig_expr;
1880
1881       if (integer_zerop (op1))
1882         expr = op0;
1883       else if (integer_zerop (op0))
1884         {
1885           if (code == MINUS_EXPR)
1886             expr = fold_build1 (NEGATE_EXPR, type, op1);
1887           else
1888             expr = op1;
1889         }
1890       else
1891         expr = fold_build2 (code, type, op0, op1);
1892
1893       return fold_convert (orig_type, expr);
1894
1895     case ARRAY_REF:
1896       if (!inside_addr)
1897         return orig_expr;
1898
1899       step = array_ref_element_size (expr);
1900       if (!cst_and_fits_in_hwi (step))
1901         break;
1902
1903       st = int_cst_value (step);
1904       op1 = TREE_OPERAND (expr, 1);
1905       op1 = strip_offset_1 (op1, false, false, &off1);
1906       *offset = off1 * st;
1907
1908       if (top_compref
1909           && integer_zerop (op1))
1910         {
1911           /* Strip the component reference completely.  */
1912           op0 = TREE_OPERAND (expr, 0);
1913           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1914           *offset += off0;
1915           return op0;
1916         }
1917       break;
1918
1919     case COMPONENT_REF:
1920       if (!inside_addr)
1921         return orig_expr;
1922
1923       tmp = component_ref_field_offset (expr);
1924       if (top_compref
1925           && cst_and_fits_in_hwi (tmp))
1926         {
1927           /* Strip the component reference completely.  */
1928           op0 = TREE_OPERAND (expr, 0);
1929           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1930           *offset = off0 + int_cst_value (tmp);
1931           return op0;
1932         }
1933       break;
1934
1935     case ADDR_EXPR:
1936       op0 = TREE_OPERAND (expr, 0);
1937       op0 = strip_offset_1 (op0, true, true, &off0);
1938       *offset += off0;
1939
1940       if (op0 == TREE_OPERAND (expr, 0))
1941         return orig_expr;
1942
1943       expr = build_fold_addr_expr (op0);
1944       return fold_convert (orig_type, expr);
1945
1946     case INDIRECT_REF:
1947       inside_addr = false;
1948       break;
1949
1950     default:
1951       return orig_expr;
1952     }
1953
1954   /* Default handling of expressions for that we want to recurse into
1955      the first operand.  */
1956   op0 = TREE_OPERAND (expr, 0);
1957   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1958   *offset += off0;
1959
1960   if (op0 == TREE_OPERAND (expr, 0)
1961       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1962     return orig_expr;
1963
1964   expr = copy_node (expr);
1965   TREE_OPERAND (expr, 0) = op0;
1966   if (op1)
1967     TREE_OPERAND (expr, 1) = op1;
1968
1969   /* Inside address, we might strip the top level component references,
1970      thus changing type of the expression.  Handling of ADDR_EXPR
1971      will fix that.  */
1972   expr = fold_convert (orig_type, expr);
1973
1974   return expr;
1975 }
1976
1977 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1978
1979 static tree
1980 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1981 {
1982   return strip_offset_1 (expr, false, false, offset);
1983 }
1984
1985 /* Returns variant of TYPE that can be used as base for different uses.
1986    We return unsigned type with the same precision, which avoids problems
1987    with overflows.  */
1988
1989 static tree
1990 generic_type_for (tree type)
1991 {
1992   if (POINTER_TYPE_P (type))
1993     return unsigned_type_for (type);
1994
1995   if (TYPE_UNSIGNED (type))
1996     return type;
1997
1998   return unsigned_type_for (type);
1999 }
2000
2001 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2002    the bitmap to that we should store it.  */
2003
2004 static struct ivopts_data *fd_ivopts_data;
2005 static tree
2006 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2007 {
2008   bitmap *depends_on = (bitmap *) data;
2009   struct version_info *info;
2010
2011   if (TREE_CODE (*expr_p) != SSA_NAME)
2012     return NULL_TREE;
2013   info = name_info (fd_ivopts_data, *expr_p);
2014
2015   if (!info->inv_id || info->has_nonlin_use)
2016     return NULL_TREE;
2017
2018   if (!*depends_on)
2019     *depends_on = BITMAP_ALLOC (NULL);
2020   bitmap_set_bit (*depends_on, info->inv_id);
2021
2022   return NULL_TREE;
2023 }
2024
2025 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2026    position to POS.  If USE is not NULL, the candidate is set as related to
2027    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2028    replacement of the final value of the iv by a direct computation.  */
2029
2030 static struct iv_cand *
2031 add_candidate_1 (struct ivopts_data *data,
2032                  tree base, tree step, bool important, enum iv_position pos,
2033                  struct iv_use *use, tree incremented_at)
2034 {
2035   unsigned i;
2036   struct iv_cand *cand = NULL;
2037   tree type, orig_type;
2038
2039   if (base)
2040     {
2041       orig_type = TREE_TYPE (base);
2042       type = generic_type_for (orig_type);
2043       if (type != orig_type)
2044         {
2045           base = fold_convert (type, base);
2046           step = fold_convert (type, step);
2047         }
2048     }
2049
2050   for (i = 0; i < n_iv_cands (data); i++)
2051     {
2052       cand = iv_cand (data, i);
2053
2054       if (cand->pos != pos)
2055         continue;
2056
2057       if (cand->incremented_at != incremented_at)
2058         continue;
2059
2060       if (!cand->iv)
2061         {
2062           if (!base && !step)
2063             break;
2064
2065           continue;
2066         }
2067
2068       if (!base && !step)
2069         continue;
2070
2071       if (operand_equal_p (base, cand->iv->base, 0)
2072           && operand_equal_p (step, cand->iv->step, 0))
2073         break;
2074     }
2075
2076   if (i == n_iv_cands (data))
2077     {
2078       cand = XCNEW (struct iv_cand);
2079       cand->id = i;
2080
2081       if (!base && !step)
2082         cand->iv = NULL;
2083       else
2084         cand->iv = alloc_iv (base, step);
2085
2086       cand->pos = pos;
2087       if (pos != IP_ORIGINAL && cand->iv)
2088         {
2089           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2090           cand->var_after = cand->var_before;
2091         }
2092       cand->important = important;
2093       cand->incremented_at = incremented_at;
2094       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2095
2096       if (step
2097           && TREE_CODE (step) != INTEGER_CST)
2098         {
2099           fd_ivopts_data = data;
2100           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2101         }
2102
2103       if (dump_file && (dump_flags & TDF_DETAILS))
2104         dump_cand (dump_file, cand);
2105     }
2106
2107   if (important && !cand->important)
2108     {
2109       cand->important = true;
2110       if (dump_file && (dump_flags & TDF_DETAILS))
2111         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2112     }
2113
2114   if (use)
2115     {
2116       bitmap_set_bit (use->related_cands, i);
2117       if (dump_file && (dump_flags & TDF_DETAILS))
2118         fprintf (dump_file, "Candidate %d is related to use %d\n",
2119                  cand->id, use->id);
2120     }
2121
2122   return cand;
2123 }
2124
2125 /* Returns true if incrementing the induction variable at the end of the LOOP
2126    is allowed.
2127
2128    The purpose is to avoid splitting latch edge with a biv increment, thus
2129    creating a jump, possibly confusing other optimization passes and leaving
2130    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2131    is not available (so we do not have a better alternative), or if the latch
2132    edge is already nonempty.  */
2133
2134 static bool
2135 allow_ip_end_pos_p (struct loop *loop)
2136 {
2137   if (!ip_normal_pos (loop))
2138     return true;
2139
2140   if (!empty_block_p (ip_end_pos (loop)))
2141     return true;
2142
2143   return false;
2144 }
2145
2146 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2147    position to POS.  If USE is not NULL, the candidate is set as related to
2148    it.  The candidate computation is scheduled on all available positions.  */
2149
2150 static void
2151 add_candidate (struct ivopts_data *data,
2152                tree base, tree step, bool important, struct iv_use *use)
2153 {
2154   if (ip_normal_pos (data->current_loop))
2155     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2156   if (ip_end_pos (data->current_loop)
2157       && allow_ip_end_pos_p (data->current_loop))
2158     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2159 }
2160
2161 /* Add a standard "0 + 1 * iteration" iv candidate for a
2162    type with SIZE bits.  */
2163
2164 static void
2165 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2166                                      unsigned int size)
2167 {
2168   tree type = lang_hooks.types.type_for_size (size, true);
2169   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2170                  true, NULL);
2171 }
2172
2173 /* Adds standard iv candidates.  */
2174
2175 static void
2176 add_standard_iv_candidates (struct ivopts_data *data)
2177 {
2178   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2179
2180   /* The same for a double-integer type if it is still fast enough.  */
2181   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2182     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2183 }
2184
2185
2186 /* Adds candidates bases on the old induction variable IV.  */
2187
2188 static void
2189 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2190 {
2191   tree phi, def;
2192   struct iv_cand *cand;
2193
2194   add_candidate (data, iv->base, iv->step, true, NULL);
2195
2196   /* The same, but with initial value zero.  */
2197   add_candidate (data,
2198                  build_int_cst (TREE_TYPE (iv->base), 0),
2199                  iv->step, true, NULL);
2200
2201   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2202   if (TREE_CODE (phi) == PHI_NODE)
2203     {
2204       /* Additionally record the possibility of leaving the original iv
2205          untouched.  */
2206       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2207       cand = add_candidate_1 (data,
2208                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2209                               SSA_NAME_DEF_STMT (def));
2210       cand->var_before = iv->ssa_name;
2211       cand->var_after = def;
2212     }
2213 }
2214
2215 /* Adds candidates based on the old induction variables.  */
2216
2217 static void
2218 add_old_ivs_candidates (struct ivopts_data *data)
2219 {
2220   unsigned i;
2221   struct iv *iv;
2222   bitmap_iterator bi;
2223
2224   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2225     {
2226       iv = ver_info (data, i)->iv;
2227       if (iv && iv->biv_p && !integer_zerop (iv->step))
2228         add_old_iv_candidates (data, iv);
2229     }
2230 }
2231
2232 /* Adds candidates based on the value of the induction variable IV and USE.  */
2233
2234 static void
2235 add_iv_value_candidates (struct ivopts_data *data,
2236                          struct iv *iv, struct iv_use *use)
2237 {
2238   unsigned HOST_WIDE_INT offset;
2239   tree base;
2240
2241   add_candidate (data, iv->base, iv->step, false, use);
2242
2243   /* The same, but with initial value zero.  Make such variable important,
2244      since it is generic enough so that possibly many uses may be based
2245      on it.  */
2246   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2247                  iv->step, true, use);
2248
2249   /* Third, try removing the constant offset.  */
2250   base = strip_offset (iv->base, &offset);
2251   if (offset)
2252     add_candidate (data, base, iv->step, false, use);
2253 }
2254
2255 /* Adds candidates based on the uses.  */
2256
2257 static void
2258 add_derived_ivs_candidates (struct ivopts_data *data)
2259 {
2260   unsigned i;
2261
2262   for (i = 0; i < n_iv_uses (data); i++)
2263     {
2264       struct iv_use *use = iv_use (data, i);
2265
2266       if (!use)
2267         continue;
2268
2269       switch (use->type)
2270         {
2271         case USE_NONLINEAR_EXPR:
2272         case USE_COMPARE:
2273         case USE_ADDRESS:
2274           /* Just add the ivs based on the value of the iv used here.  */
2275           add_iv_value_candidates (data, use->iv, use);
2276           break;
2277
2278         default:
2279           gcc_unreachable ();
2280         }
2281     }
2282 }
2283
2284 /* Record important candidates and add them to related_cands bitmaps
2285    if needed.  */
2286
2287 static void
2288 record_important_candidates (struct ivopts_data *data)
2289 {
2290   unsigned i;
2291   struct iv_use *use;
2292
2293   for (i = 0; i < n_iv_cands (data); i++)
2294     {
2295       struct iv_cand *cand = iv_cand (data, i);
2296
2297       if (cand->important)
2298         bitmap_set_bit (data->important_candidates, i);
2299     }
2300
2301   data->consider_all_candidates = (n_iv_cands (data)
2302                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2303
2304   if (data->consider_all_candidates)
2305     {
2306       /* We will not need "related_cands" bitmaps in this case,
2307          so release them to decrease peak memory consumption.  */
2308       for (i = 0; i < n_iv_uses (data); i++)
2309         {
2310           use = iv_use (data, i);
2311           BITMAP_FREE (use->related_cands);
2312         }
2313     }
2314   else
2315     {
2316       /* Add important candidates to the related_cands bitmaps.  */
2317       for (i = 0; i < n_iv_uses (data); i++)
2318         bitmap_ior_into (iv_use (data, i)->related_cands,
2319                          data->important_candidates);
2320     }
2321 }
2322
2323 /* Finds the candidates for the induction variables.  */
2324
2325 static void
2326 find_iv_candidates (struct ivopts_data *data)
2327 {
2328   /* Add commonly used ivs.  */
2329   add_standard_iv_candidates (data);
2330
2331   /* Add old induction variables.  */
2332   add_old_ivs_candidates (data);
2333
2334   /* Add induction variables derived from uses.  */
2335   add_derived_ivs_candidates (data);
2336
2337   /* Record the important candidates.  */
2338   record_important_candidates (data);
2339 }
2340
2341 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2342    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2343    we allocate a simple list to every use.  */
2344
2345 static void
2346 alloc_use_cost_map (struct ivopts_data *data)
2347 {
2348   unsigned i, size, s, j;
2349
2350   for (i = 0; i < n_iv_uses (data); i++)
2351     {
2352       struct iv_use *use = iv_use (data, i);
2353       bitmap_iterator bi;
2354
2355       if (data->consider_all_candidates)
2356         size = n_iv_cands (data);
2357       else
2358         {
2359           s = 0;
2360           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2361             {
2362               s++;
2363             }
2364
2365           /* Round up to the power of two, so that moduling by it is fast.  */
2366           for (size = 1; size < s; size <<= 1)
2367             continue;
2368         }
2369
2370       use->n_map_members = size;
2371       use->cost_map = XCNEWVEC (struct cost_pair, size);
2372     }
2373 }
2374
2375 /* Returns description of computation cost of expression whose runtime
2376    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2377
2378 static comp_cost
2379 new_cost (unsigned runtime, unsigned complexity)
2380 {
2381   comp_cost cost;
2382
2383   cost.cost = runtime;
2384   cost.complexity = complexity;
2385
2386   return cost;
2387 }
2388
2389 /* Adds costs COST1 and COST2.  */
2390
2391 static comp_cost
2392 add_costs (comp_cost cost1, comp_cost cost2)
2393 {
2394   cost1.cost += cost2.cost;
2395   cost1.complexity += cost2.complexity;
2396
2397   return cost1;
2398 }
2399 /* Subtracts costs COST1 and COST2.  */
2400
2401 static comp_cost
2402 sub_costs (comp_cost cost1, comp_cost cost2)
2403 {
2404   cost1.cost -= cost2.cost;
2405   cost1.complexity -= cost2.complexity;
2406
2407   return cost1;
2408 }
2409
2410 /* Returns a negative number if COST1 < COST2, a positive number if
2411    COST1 > COST2, and 0 if COST1 = COST2.  */
2412
2413 static int
2414 compare_costs (comp_cost cost1, comp_cost cost2)
2415 {
2416   if (cost1.cost == cost2.cost)
2417     return cost1.complexity - cost2.complexity;
2418
2419   return cost1.cost - cost2.cost;
2420 }
2421
2422 /* Returns true if COST is infinite.  */
2423
2424 static bool
2425 infinite_cost_p (comp_cost cost)
2426 {
2427   return cost.cost == INFTY;
2428 }
2429
2430 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2431    on invariants DEPENDS_ON and that the value used in expressing it
2432    is VALUE.*/
2433
2434 static void
2435 set_use_iv_cost (struct ivopts_data *data,
2436                  struct iv_use *use, struct iv_cand *cand,
2437                  comp_cost cost, bitmap depends_on, tree value)
2438 {
2439   unsigned i, s;
2440
2441   if (infinite_cost_p (cost))
2442     {
2443       BITMAP_FREE (depends_on);
2444       return;
2445     }
2446
2447   if (data->consider_all_candidates)
2448     {
2449       use->cost_map[cand->id].cand = cand;
2450       use->cost_map[cand->id].cost = cost;
2451       use->cost_map[cand->id].depends_on = depends_on;
2452       use->cost_map[cand->id].value = value;
2453       return;
2454     }
2455
2456   /* n_map_members is a power of two, so this computes modulo.  */
2457   s = cand->id & (use->n_map_members - 1);
2458   for (i = s; i < use->n_map_members; i++)
2459     if (!use->cost_map[i].cand)
2460       goto found;
2461   for (i = 0; i < s; i++)
2462     if (!use->cost_map[i].cand)
2463       goto found;
2464
2465   gcc_unreachable ();
2466
2467 found:
2468   use->cost_map[i].cand = cand;
2469   use->cost_map[i].cost = cost;
2470   use->cost_map[i].depends_on = depends_on;
2471   use->cost_map[i].value = value;
2472 }
2473
2474 /* Gets cost of (USE, CANDIDATE) pair.  */
2475
2476 static struct cost_pair *
2477 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2478                  struct iv_cand *cand)
2479 {
2480   unsigned i, s;
2481   struct cost_pair *ret;
2482
2483   if (!cand)
2484     return NULL;
2485
2486   if (data->consider_all_candidates)
2487     {
2488       ret = use->cost_map + cand->id;
2489       if (!ret->cand)
2490         return NULL;
2491
2492       return ret;
2493     }
2494
2495   /* n_map_members is a power of two, so this computes modulo.  */
2496   s = cand->id & (use->n_map_members - 1);
2497   for (i = s; i < use->n_map_members; i++)
2498     if (use->cost_map[i].cand == cand)
2499       return use->cost_map + i;
2500
2501   for (i = 0; i < s; i++)
2502     if (use->cost_map[i].cand == cand)
2503       return use->cost_map + i;
2504
2505   return NULL;
2506 }
2507
2508 /* Returns estimate on cost of computing SEQ.  */
2509
2510 static unsigned
2511 seq_cost (rtx seq)
2512 {
2513   unsigned cost = 0;
2514   rtx set;
2515
2516   for (; seq; seq = NEXT_INSN (seq))
2517     {
2518       set = single_set (seq);
2519       if (set)
2520         cost += rtx_cost (set, SET);
2521       else
2522         cost++;
2523     }
2524
2525   return cost;
2526 }
2527
2528 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2529 static rtx
2530 produce_memory_decl_rtl (tree obj, int *regno)
2531 {
2532   rtx x;
2533
2534   gcc_assert (obj);
2535   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2536     {
2537       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2538       x = gen_rtx_SYMBOL_REF (Pmode, name);
2539       SET_SYMBOL_REF_DECL (x, obj);
2540       x = gen_rtx_MEM (DECL_MODE (obj), x);
2541       targetm.encode_section_info (obj, x, true);
2542     }
2543   else
2544     {
2545       x = gen_raw_REG (Pmode, (*regno)++);
2546       x = gen_rtx_MEM (DECL_MODE (obj), x);
2547     }
2548
2549   return x;
2550 }
2551
2552 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2553    walk_tree.  DATA contains the actual fake register number.  */
2554
2555 static tree
2556 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2557 {
2558   tree obj = NULL_TREE;
2559   rtx x = NULL_RTX;
2560   int *regno = (int *) data;
2561
2562   switch (TREE_CODE (*expr_p))
2563     {
2564     case ADDR_EXPR:
2565       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2566            handled_component_p (*expr_p);
2567            expr_p = &TREE_OPERAND (*expr_p, 0))
2568         continue;
2569       obj = *expr_p;
2570       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2571         x = produce_memory_decl_rtl (obj, regno);
2572       break;
2573
2574     case SSA_NAME:
2575       *ws = 0;
2576       obj = SSA_NAME_VAR (*expr_p);
2577       if (!DECL_RTL_SET_P (obj))
2578         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2579       break;
2580
2581     case VAR_DECL:
2582     case PARM_DECL:
2583     case RESULT_DECL:
2584       *ws = 0;
2585       obj = *expr_p;
2586
2587       if (DECL_RTL_SET_P (obj))
2588         break;
2589
2590       if (DECL_MODE (obj) == BLKmode)
2591         x = produce_memory_decl_rtl (obj, regno);
2592       else
2593         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2594
2595       break;
2596
2597     default:
2598       break;
2599     }
2600
2601   if (x)
2602     {
2603       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2604       SET_DECL_RTL (obj, x);
2605     }
2606
2607   return NULL_TREE;
2608 }
2609
2610 /* Determines cost of the computation of EXPR.  */
2611
2612 static unsigned
2613 computation_cost (tree expr)
2614 {
2615   rtx seq, rslt;
2616   tree type = TREE_TYPE (expr);
2617   unsigned cost;
2618   /* Avoid using hard regs in ways which may be unsupported.  */
2619   int regno = LAST_VIRTUAL_REGISTER + 1;
2620
2621   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2622   start_sequence ();
2623   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2624   seq = get_insns ();
2625   end_sequence ();
2626
2627   cost = seq_cost (seq);
2628   if (MEM_P (rslt))
2629     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2630
2631   return cost;
2632 }
2633
2634 /* Returns variable containing the value of candidate CAND at statement AT.  */
2635
2636 static tree
2637 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2638 {
2639   if (stmt_after_increment (loop, cand, stmt))
2640     return cand->var_after;
2641   else
2642     return cand->var_before;
2643 }
2644
2645 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2646    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2647
2648 int
2649 tree_int_cst_sign_bit (const_tree t)
2650 {
2651   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2652   unsigned HOST_WIDE_INT w;
2653
2654   if (bitno < HOST_BITS_PER_WIDE_INT)
2655     w = TREE_INT_CST_LOW (t);
2656   else
2657     {
2658       w = TREE_INT_CST_HIGH (t);
2659       bitno -= HOST_BITS_PER_WIDE_INT;
2660     }
2661
2662   return (w >> bitno) & 1;
2663 }
2664
2665 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2666    same precision that is at least as wide as the precision of TYPE, stores
2667    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2668    type of A and B.  */
2669
2670 static tree
2671 determine_common_wider_type (tree *a, tree *b)
2672 {
2673   tree wider_type = NULL;
2674   tree suba, subb;
2675   tree atype = TREE_TYPE (*a);
2676
2677   if ((TREE_CODE (*a) == NOP_EXPR
2678        || TREE_CODE (*a) == CONVERT_EXPR))
2679     {
2680       suba = TREE_OPERAND (*a, 0);
2681       wider_type = TREE_TYPE (suba);
2682       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2683         return atype;
2684     }
2685   else
2686     return atype;
2687
2688   if ((TREE_CODE (*b) == NOP_EXPR
2689        || TREE_CODE (*b) == CONVERT_EXPR))
2690     {
2691       subb = TREE_OPERAND (*b, 0);
2692       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2693         return atype;
2694     }
2695   else
2696     return atype;
2697
2698   *a = suba;
2699   *b = subb;
2700   return wider_type;
2701 }
2702
2703 /* Determines the expression by that USE is expressed from induction variable
2704    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2705    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2706
2707 static bool
2708 get_computation_aff (struct loop *loop,
2709                      struct iv_use *use, struct iv_cand *cand, tree at,
2710                      struct affine_tree_combination *aff)
2711 {
2712   tree ubase = use->iv->base;
2713   tree ustep = use->iv->step;
2714   tree cbase = cand->iv->base;
2715   tree cstep = cand->iv->step, cstep_common;
2716   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2717   tree common_type, var;
2718   tree uutype;
2719   aff_tree cbase_aff, var_aff;
2720   double_int rat;
2721
2722   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2723     {
2724       /* We do not have a precision to express the values of use.  */
2725       return false;
2726     }
2727
2728   var = var_at_stmt (loop, cand, at);
2729   uutype = unsigned_type_for (utype);
2730
2731   /* If the conversion is not noop, perform it.  */
2732   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2733     {
2734       cstep = fold_convert (uutype, cstep);
2735       cbase = fold_convert (uutype, cbase);
2736       var = fold_convert (uutype, var);
2737     }
2738
2739   if (!constant_multiple_of (ustep, cstep, &rat))
2740     return false;
2741
2742   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2743      type, we achieve better folding by computing their difference in this
2744      wider type, and cast the result to UUTYPE.  We do not need to worry about
2745      overflows, as all the arithmetics will in the end be performed in UUTYPE
2746      anyway.  */
2747   common_type = determine_common_wider_type (&ubase, &cbase);
2748
2749   /* use = ubase - ratio * cbase + ratio * var.  */
2750   tree_to_aff_combination (ubase, common_type, aff);
2751   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2752   tree_to_aff_combination (var, uutype, &var_aff);
2753
2754   /* We need to shift the value if we are after the increment.  */
2755   if (stmt_after_increment (loop, cand, at))
2756     {
2757       aff_tree cstep_aff;
2758
2759       if (common_type != uutype)
2760         cstep_common = fold_convert (common_type, cstep);
2761       else
2762         cstep_common = cstep;
2763
2764       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2765       aff_combination_add (&cbase_aff, &cstep_aff);
2766     }
2767
2768   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2769   aff_combination_add (aff, &cbase_aff);
2770   if (common_type != uutype)
2771     aff_combination_convert (aff, uutype);
2772
2773   aff_combination_scale (&var_aff, rat);
2774   aff_combination_add (aff, &var_aff);
2775
2776   return true;
2777 }
2778
2779 /* Determines the expression by that USE is expressed from induction variable
2780    CAND at statement AT in LOOP.  The computation is unshared.  */
2781
2782 static tree
2783 get_computation_at (struct loop *loop,
2784                     struct iv_use *use, struct iv_cand *cand, tree at)
2785 {
2786   aff_tree aff;
2787   tree type = TREE_TYPE (use->iv->base);
2788
2789   if (!get_computation_aff (loop, use, cand, at, &aff))
2790     return NULL_TREE;
2791   unshare_aff_combination (&aff);
2792   return fold_convert (type, aff_combination_to_tree (&aff));
2793 }
2794
2795 /* Determines the expression by that USE is expressed from induction variable
2796    CAND in LOOP.  The computation is unshared.  */
2797
2798 static tree
2799 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2800 {
2801   return get_computation_at (loop, use, cand, use->stmt);
2802 }
2803
2804 /* Returns cost of addition in MODE.  */
2805
2806 static unsigned
2807 add_cost (enum machine_mode mode)
2808 {
2809   static unsigned costs[NUM_MACHINE_MODES];
2810   rtx seq;
2811   unsigned cost;
2812
2813   if (costs[mode])
2814     return costs[mode];
2815
2816   start_sequence ();
2817   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2818                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2819                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2820                  NULL_RTX);
2821   seq = get_insns ();
2822   end_sequence ();
2823
2824   cost = seq_cost (seq);
2825   if (!cost)
2826     cost = 1;
2827
2828   costs[mode] = cost;
2829
2830   if (dump_file && (dump_flags & TDF_DETAILS))
2831     fprintf (dump_file, "Addition in %s costs %d\n",
2832              GET_MODE_NAME (mode), cost);
2833   return cost;
2834 }
2835
2836 /* Entry in a hashtable of already known costs for multiplication.  */
2837 struct mbc_entry
2838 {
2839   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2840   enum machine_mode mode;       /* In mode.  */
2841   unsigned cost;                /* The cost.  */
2842 };
2843
2844 /* Counts hash value for the ENTRY.  */
2845
2846 static hashval_t
2847 mbc_entry_hash (const void *entry)
2848 {
2849   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2850
2851   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2852 }
2853
2854 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2855
2856 static int
2857 mbc_entry_eq (const void *entry1, const void *entry2)
2858 {
2859   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2860   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2861
2862   return (e1->mode == e2->mode
2863           && e1->cst == e2->cst);
2864 }
2865
2866 /* Returns cost of multiplication by constant CST in MODE.  */
2867
2868 unsigned
2869 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2870 {
2871   static htab_t costs;
2872   struct mbc_entry **cached, act;
2873   rtx seq;
2874   unsigned cost;
2875
2876   if (!costs)
2877     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2878
2879   act.mode = mode;
2880   act.cst = cst;
2881   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2882   if (*cached)
2883     return (*cached)->cost;
2884
2885   *cached = XNEW (struct mbc_entry);
2886   (*cached)->mode = mode;
2887   (*cached)->cst = cst;
2888
2889   start_sequence ();
2890   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2891                gen_int_mode (cst, mode), NULL_RTX, 0);
2892   seq = get_insns ();
2893   end_sequence ();
2894
2895   cost = seq_cost (seq);
2896
2897   if (dump_file && (dump_flags & TDF_DETAILS))
2898     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2899              (int) cst, GET_MODE_NAME (mode), cost);
2900
2901   (*cached)->cost = cost;
2902
2903   return cost;
2904 }
2905
2906 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2907    validity for a memory reference accessing memory of mode MODE.  */
2908
2909 bool
2910 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2911 {
2912 #define MAX_RATIO 128
2913   static sbitmap valid_mult[MAX_MACHINE_MODE];
2914
2915   if (!valid_mult[mode])
2916     {
2917       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2918       rtx addr;
2919       HOST_WIDE_INT i;
2920
2921       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2922       sbitmap_zero (valid_mult[mode]);
2923       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2924       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2925         {
2926           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2927           if (memory_address_p (mode, addr))
2928             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2929         }
2930
2931       if (dump_file && (dump_flags & TDF_DETAILS))
2932         {
2933           fprintf (dump_file, "  allowed multipliers:");
2934           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2935             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2936               fprintf (dump_file, " %d", (int) i);
2937           fprintf (dump_file, "\n");
2938           fprintf (dump_file, "\n");
2939         }
2940     }
2941
2942   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2943     return false;
2944
2945   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2946 }
2947
2948 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2949    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2950    variable is omitted.  Compute the cost for a memory reference that accesses
2951    a memory location of mode MEM_MODE.
2952
2953    TODO -- there must be some better way.  This all is quite crude.  */
2954
2955 static comp_cost
2956 get_address_cost (bool symbol_present, bool var_present,
2957                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2958                   enum machine_mode mem_mode)
2959 {
2960   static bool initialized[MAX_MACHINE_MODE];
2961   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2962   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2963   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2964   unsigned cost, acost, complexity;
2965   bool offset_p, ratio_p;
2966   HOST_WIDE_INT s_offset;
2967   unsigned HOST_WIDE_INT mask;
2968   unsigned bits;
2969
2970   if (!initialized[mem_mode])
2971     {
2972       HOST_WIDE_INT i;
2973       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2974       int old_cse_not_expected;
2975       unsigned sym_p, var_p, off_p, rat_p, add_c;
2976       rtx seq, addr, base;
2977       rtx reg0, reg1;
2978
2979       initialized[mem_mode] = true;
2980
2981       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2982
2983       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2984       for (i = start; i <= 1 << 20; i <<= 1)
2985         {
2986           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2987           if (!memory_address_p (mem_mode, addr))
2988             break;
2989         }
2990       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2991       off[mem_mode] = max_offset[mem_mode];
2992
2993       for (i = start; i <= 1 << 20; i <<= 1)
2994         {
2995           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2996           if (!memory_address_p (mem_mode, addr))
2997             break;
2998         }
2999       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3000
3001       if (dump_file && (dump_flags & TDF_DETAILS))
3002         {
3003           fprintf (dump_file, "get_address_cost:\n");
3004           fprintf (dump_file, "  min offset %s %d\n",
3005                    GET_MODE_NAME (mem_mode),
3006                    (int) min_offset[mem_mode]);
3007           fprintf (dump_file, "  max offset %s %d\n",
3008                    GET_MODE_NAME (mem_mode),
3009                    (int) max_offset[mem_mode]);
3010         }
3011
3012       rat[mem_mode] = 1;
3013       for (i = 2; i <= MAX_RATIO; i++)
3014         if (multiplier_allowed_in_address_p (i, mem_mode))
3015           {
3016             rat[mem_mode] = i;
3017             break;
3018           }
3019
3020       /* Compute the cost of various addressing modes.  */
3021       acost = 0;
3022       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3023       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3024
3025       for (i = 0; i < 16; i++)
3026         {
3027           sym_p = i & 1;
3028           var_p = (i >> 1) & 1;
3029           off_p = (i >> 2) & 1;
3030           rat_p = (i >> 3) & 1;
3031
3032           addr = reg0;
3033           if (rat_p)
3034             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3035                                    gen_int_mode (rat[mem_mode], Pmode));
3036
3037           if (var_p)
3038             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3039
3040           if (sym_p)
3041             {
3042               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3043               /* ??? We can run into trouble with some backends by presenting
3044                  it with symbols which havn't been properly passed through
3045                  targetm.encode_section_info.  By setting the local bit, we
3046                  enhance the probability of things working.  */
3047               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3048
3049               if (off_p)
3050                 base = gen_rtx_fmt_e (CONST, Pmode,
3051                                       gen_rtx_fmt_ee (PLUS, Pmode,
3052                                                       base,
3053                                                       gen_int_mode (off[mem_mode],
3054                                                                     Pmode)));
3055             }
3056           else if (off_p)
3057             base = gen_int_mode (off[mem_mode], Pmode);
3058           else
3059             base = NULL_RTX;
3060
3061           if (base)
3062             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3063
3064           start_sequence ();
3065           /* To avoid splitting addressing modes, pretend that no cse will
3066              follow.  */
3067           old_cse_not_expected = cse_not_expected;
3068           cse_not_expected = true;
3069           addr = memory_address (mem_mode, addr);
3070           cse_not_expected = old_cse_not_expected;
3071           seq = get_insns ();
3072           end_sequence ();
3073
3074           acost = seq_cost (seq);
3075           acost += address_cost (addr, mem_mode);
3076
3077           if (!acost)
3078             acost = 1;
3079           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3080         }
3081
3082       /* On some targets, it is quite expensive to load symbol to a register,
3083          which makes addresses that contain symbols look much more expensive.
3084          However, the symbol will have to be loaded in any case before the
3085          loop (and quite likely we have it in register already), so it does not
3086          make much sense to penalize them too heavily.  So make some final
3087          tweaks for the SYMBOL_PRESENT modes:
3088
3089          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3090          var is cheaper, use this mode with small penalty.
3091          If VAR_PRESENT is true, try whether the mode with
3092          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3093          if this is the case, use it.  */
3094       add_c = add_cost (Pmode);
3095       for (i = 0; i < 8; i++)
3096         {
3097           var_p = i & 1;
3098           off_p = (i >> 1) & 1;
3099           rat_p = (i >> 2) & 1;
3100
3101           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3102           if (var_p)
3103             acost += add_c;
3104
3105           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3106             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3107         }
3108
3109       if (dump_file && (dump_flags & TDF_DETAILS))
3110         {
3111           fprintf (dump_file, "Address costs:\n");
3112
3113           for (i = 0; i < 16; i++)
3114             {
3115               sym_p = i & 1;
3116               var_p = (i >> 1) & 1;
3117               off_p = (i >> 2) & 1;
3118               rat_p = (i >> 3) & 1;
3119
3120               fprintf (dump_file, "  ");
3121               if (sym_p)
3122                 fprintf (dump_file, "sym + ");
3123               if (var_p)
3124                 fprintf (dump_file, "var + ");
3125               if (off_p)
3126                 fprintf (dump_file, "cst + ");
3127               if (rat_p)
3128                 fprintf (dump_file, "rat * ");
3129
3130               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3131               fprintf (dump_file, "index costs %d\n", acost);
3132             }
3133           fprintf (dump_file, "\n");
3134         }
3135     }
3136
3137   bits = GET_MODE_BITSIZE (Pmode);
3138   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3139   offset &= mask;
3140   if ((offset >> (bits - 1) & 1))
3141     offset |= ~mask;
3142   s_offset = offset;
3143
3144   cost = 0;
3145   offset_p = (s_offset != 0
3146               && min_offset[mem_mode] <= s_offset
3147               && s_offset <= max_offset[mem_mode]);
3148   ratio_p = (ratio != 1
3149              && multiplier_allowed_in_address_p (ratio, mem_mode));
3150
3151   if (ratio != 1 && !ratio_p)
3152     cost += multiply_by_cost (ratio, Pmode);
3153
3154   if (s_offset && !offset_p && !symbol_present)
3155     cost += add_cost (Pmode);
3156
3157   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3158   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3159   return new_cost (cost + acost, complexity);
3160 }
3161
3162 /* Estimates cost of forcing expression EXPR into a variable.  */
3163
3164 static comp_cost
3165 force_expr_to_var_cost (tree expr)
3166 {
3167   static bool costs_initialized = false;
3168   static unsigned integer_cost;
3169   static unsigned symbol_cost;
3170   static unsigned address_cost;
3171   tree op0, op1;
3172   comp_cost cost0, cost1, cost;
3173   enum machine_mode mode;
3174
3175   if (!costs_initialized)
3176     {
3177       tree type = build_pointer_type (integer_type_node);
3178       tree var, addr;
3179       rtx x;
3180
3181       var = create_tmp_var_raw (integer_type_node, "test_var");
3182       TREE_STATIC (var) = 1;
3183       x = produce_memory_decl_rtl (var, NULL);
3184       SET_DECL_RTL (var, x);
3185
3186       integer_cost = computation_cost (build_int_cst (integer_type_node,
3187                                                       2000));
3188
3189       addr = build1 (ADDR_EXPR, type, var);
3190       symbol_cost = computation_cost (addr) + 1;
3191
3192       address_cost
3193         = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3194                                     addr,
3195                                     build_int_cst (sizetype, 2000))) + 1;
3196       if (dump_file && (dump_flags & TDF_DETAILS))
3197         {
3198           fprintf (dump_file, "force_expr_to_var_cost:\n");
3199           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3200           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3201           fprintf (dump_file, "  address %d\n", (int) address_cost);
3202           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3203           fprintf (dump_file, "\n");
3204         }
3205
3206       costs_initialized = true;
3207     }
3208
3209   STRIP_NOPS (expr);
3210
3211   if (SSA_VAR_P (expr))
3212     return zero_cost;
3213
3214   if (TREE_INVARIANT (expr))
3215     {
3216       if (TREE_CODE (expr) == INTEGER_CST)
3217         return new_cost (integer_cost, 0);
3218
3219       if (TREE_CODE (expr) == ADDR_EXPR)
3220         {
3221           tree obj = TREE_OPERAND (expr, 0);
3222
3223           if (TREE_CODE (obj) == VAR_DECL
3224               || TREE_CODE (obj) == PARM_DECL
3225               || TREE_CODE (obj) == RESULT_DECL)
3226             return new_cost (symbol_cost, 0);
3227         }
3228
3229       return new_cost (address_cost, 0);
3230     }
3231
3232   switch (TREE_CODE (expr))
3233     {
3234     case POINTER_PLUS_EXPR:
3235     case PLUS_EXPR:
3236     case MINUS_EXPR:
3237     case MULT_EXPR:
3238       op0 = TREE_OPERAND (expr, 0);
3239       op1 = TREE_OPERAND (expr, 1);
3240       STRIP_NOPS (op0);
3241       STRIP_NOPS (op1);
3242
3243       if (is_gimple_val (op0))
3244         cost0 = zero_cost;
3245       else
3246         cost0 = force_expr_to_var_cost (op0);
3247
3248       if (is_gimple_val (op1))
3249         cost1 = zero_cost;
3250       else
3251         cost1 = force_expr_to_var_cost (op1);
3252
3253       break;
3254
3255     default:
3256       /* Just an arbitrary value, FIXME.  */
3257       return new_cost (target_spill_cost, 0);
3258     }
3259
3260   mode = TYPE_MODE (TREE_TYPE (expr));
3261   switch (TREE_CODE (expr))
3262     {
3263     case POINTER_PLUS_EXPR:
3264     case PLUS_EXPR:
3265     case MINUS_EXPR:
3266       cost = new_cost (add_cost (mode), 0);
3267       break;
3268
3269     case MULT_EXPR:
3270       if (cst_and_fits_in_hwi (op0))
3271         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3272       else if (cst_and_fits_in_hwi (op1))
3273         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3274       else
3275         return new_cost (target_spill_cost, 0);
3276       break;
3277
3278     default:
3279       gcc_unreachable ();
3280     }
3281
3282   cost = add_costs (cost, cost0);
3283   cost = add_costs (cost, cost1);
3284
3285   /* Bound the cost by target_spill_cost.  The parts of complicated
3286      computations often are either loop invariant or at least can
3287      be shared between several iv uses, so letting this grow without
3288      limits would not give reasonable results.  */
3289   if (cost.cost > target_spill_cost)
3290     cost.cost = target_spill_cost;
3291
3292   return cost;
3293 }
3294
3295 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3296    invariants the computation depends on.  */
3297
3298 static comp_cost
3299 force_var_cost (struct ivopts_data *data,
3300                 tree expr, bitmap *depends_on)
3301 {
3302   if (depends_on)
3303     {
3304       fd_ivopts_data = data;
3305       walk_tree (&expr, find_depends, depends_on, NULL);
3306     }
3307
3308   return force_expr_to_var_cost (expr);
3309 }
3310
3311 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3312    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3313    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3314    invariants the computation depends on.  */
3315
3316 static comp_cost
3317 split_address_cost (struct ivopts_data *data,
3318                     tree addr, bool *symbol_present, bool *var_present,
3319                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3320 {
3321   tree core;
3322   HOST_WIDE_INT bitsize;
3323   HOST_WIDE_INT bitpos;
3324   tree toffset;
3325   enum machine_mode mode;
3326   int unsignedp, volatilep;
3327
3328   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3329                               &unsignedp, &volatilep, false);
3330
3331   if (toffset != 0
3332       || bitpos % BITS_PER_UNIT != 0
3333       || TREE_CODE (core) != VAR_DECL)
3334     {
3335       *symbol_present = false;
3336       *var_present = true;
3337       fd_ivopts_data = data;
3338       walk_tree (&addr, find_depends, depends_on, NULL);
3339       return new_cost (target_spill_cost, 0);
3340     }
3341
3342   *offset += bitpos / BITS_PER_UNIT;
3343   if (TREE_STATIC (core)
3344       || DECL_EXTERNAL (core))
3345     {
3346       *symbol_present = true;
3347       *var_present = false;
3348       return zero_cost;
3349     }
3350
3351   *symbol_present = false;
3352   *var_present = true;
3353   return zero_cost;
3354 }
3355
3356 /* Estimates cost of expressing difference of addresses E1 - E2 as
3357    var + symbol + offset.  The value of offset is added to OFFSET,
3358    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3359    part is missing.  DEPENDS_ON is a set of the invariants the computation
3360    depends on.  */
3361
3362 static comp_cost
3363 ptr_difference_cost (struct ivopts_data *data,
3364                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3365                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3366 {
3367   HOST_WIDE_INT diff = 0;
3368   comp_cost cost;
3369
3370   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3371
3372   if (ptr_difference_const (e1, e2, &diff))
3373     {
3374       *offset += diff;
3375       *symbol_present = false;
3376       *var_present = false;
3377       return zero_cost;
3378     }
3379
3380   if (integer_zerop (e2))
3381     return split_address_cost (data, TREE_OPERAND (e1, 0),
3382                                symbol_present, var_present, offset, depends_on);
3383
3384   *symbol_present = false;
3385   *var_present = true;
3386
3387   cost = force_var_cost (data, e1, depends_on);
3388   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3389   cost.cost += add_cost (Pmode);
3390
3391   return cost;
3392 }
3393
3394 /* Estimates cost of expressing difference E1 - E2 as
3395    var + symbol + offset.  The value of offset is added to OFFSET,
3396    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3397    part is missing.  DEPENDS_ON is a set of the invariants the computation
3398    depends on.  */
3399
3400 static comp_cost
3401 difference_cost (struct ivopts_data *data,
3402                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3403                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3404 {
3405   comp_cost cost;
3406   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3407   unsigned HOST_WIDE_INT off1, off2;
3408
3409   e1 = strip_offset (e1, &off1);
3410   e2 = strip_offset (e2, &off2);
3411   *offset += off1 - off2;
3412
3413   STRIP_NOPS (e1);
3414   STRIP_NOPS (e2);
3415
3416   if (TREE_CODE (e1) == ADDR_EXPR)
3417     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3418                                 depends_on);
3419   *symbol_present = false;
3420
3421   if (operand_equal_p (e1, e2, 0))
3422     {
3423       *var_present = false;
3424       return zero_cost;
3425     }
3426   *var_present = true;
3427   if (integer_zerop (e2))
3428     return force_var_cost (data, e1, depends_on);
3429
3430   if (integer_zerop (e1))
3431     {
3432       cost = force_var_cost (data, e2, depends_on);
3433       cost.cost += multiply_by_cost (-1, mode);
3434
3435       return cost;
3436     }
3437
3438   cost = force_var_cost (data, e1, depends_on);
3439   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3440   cost.cost += add_cost (mode);
3441
3442   return cost;
3443 }
3444
3445 /* Determines the cost of the computation by that USE is expressed
3446    from induction variable CAND.  If ADDRESS_P is true, we just need
3447    to create an address from it, otherwise we want to get it into
3448    register.  A set of invariants we depend on is stored in
3449    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3450
3451 static comp_cost
3452 get_computation_cost_at (struct ivopts_data *data,
3453                          struct iv_use *use, struct iv_cand *cand,
3454                          bool address_p, bitmap *depends_on, tree at)
3455 {
3456   tree ubase = use->iv->base, ustep = use->iv->step;
3457   tree cbase, cstep;
3458   tree utype = TREE_TYPE (ubase), ctype;
3459   unsigned HOST_WIDE_INT cstepi, offset = 0;
3460   HOST_WIDE_INT ratio, aratio;
3461   bool var_present, symbol_present;
3462   comp_cost cost;
3463   unsigned n_sums;
3464   double_int rat;
3465
3466   *depends_on = NULL;
3467
3468   /* Only consider real candidates.  */
3469   if (!cand->iv)
3470     return infinite_cost;
3471
3472   cbase = cand->iv->base;
3473   cstep = cand->iv->step;
3474   ctype = TREE_TYPE (cbase);
3475
3476   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3477     {
3478       /* We do not have a precision to express the values of use.  */
3479       return infinite_cost;
3480     }
3481
3482   if (address_p)
3483     {
3484       /* Do not try to express address of an object with computation based
3485          on address of a different object.  This may cause problems in rtl
3486          level alias analysis (that does not expect this to be happening,
3487          as this is illegal in C), and would be unlikely to be useful
3488          anyway.  */
3489       if (use->iv->base_object
3490           && cand->iv->base_object
3491           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3492         return infinite_cost;
3493     }
3494
3495   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3496     {
3497       /* TODO -- add direct handling of this case.  */
3498       goto fallback;
3499     }
3500
3501   /* CSTEPI is removed from the offset in case statement is after the
3502      increment.  If the step is not constant, we use zero instead.
3503      This is a bit imprecise (there is the extra addition), but
3504      redundancy elimination is likely to transform the code so that
3505      it uses value of the variable before increment anyway,
3506      so it is not that much unrealistic.  */
3507   if (cst_and_fits_in_hwi (cstep))
3508     cstepi = int_cst_value (cstep);
3509   else
3510     cstepi = 0;
3511
3512   if (!constant_multiple_of (ustep, cstep, &rat))
3513     return infinite_cost;
3514
3515   if (double_int_fits_in_shwi_p (rat))
3516     ratio = double_int_to_shwi (rat);
3517   else
3518     return infinite_cost;
3519
3520   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3521      or ratio == 1, it is better to handle this like
3522
3523      ubase - ratio * cbase + ratio * var
3524
3525      (also holds in the case ratio == -1, TODO.  */
3526
3527   if (cst_and_fits_in_hwi (cbase))
3528     {
3529       offset = - ratio * int_cst_value (cbase);
3530       cost = difference_cost (data,
3531                               ubase, build_int_cst (utype, 0),
3532                               &symbol_present, &var_present, &offset,
3533                               depends_on);
3534     }
3535   else if (ratio == 1)
3536     {
3537       cost = difference_cost (data,
3538                               ubase, cbase,
3539                               &symbol_present, &var_present, &offset,
3540                               depends_on);
3541     }
3542   else
3543     {
3544       cost = force_var_cost (data, cbase, depends_on);
3545       cost.cost += add_cost (TYPE_MODE (ctype));
3546       cost = add_costs (cost,
3547                         difference_cost (data,
3548                                          ubase, build_int_cst (utype, 0),
3549                                          &symbol_present, &var_present,
3550                                          &offset, depends_on));
3551     }
3552
3553   /* If we are after the increment, the value of the candidate is higher by
3554      one iteration.  */
3555   if (stmt_after_increment (data->current_loop, cand, at))
3556     offset -= ratio * cstepi;
3557
3558   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3559      (symbol/var/const parts may be omitted).  If we are looking for an address,
3560      find the cost of addressing this.  */
3561   if (address_p)
3562     return add_costs (cost, get_address_cost (symbol_present, var_present,
3563                                 offset, ratio,
3564                                 TYPE_MODE (TREE_TYPE (*use->op_p))));
3565
3566   /* Otherwise estimate the costs for computing the expression.  */
3567   aratio = ratio > 0 ? ratio : -ratio;
3568   if (!symbol_present && !var_present && !offset)
3569     {
3570       if (ratio != 1)
3571         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3572
3573       return cost;
3574     }
3575
3576   if (aratio != 1)
3577     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3578
3579   n_sums = 1;
3580   if (var_present
3581       /* Symbol + offset should be compile-time computable.  */
3582       && (symbol_present || offset))
3583     n_sums++;
3584
3585   /* Having offset does not affect runtime cost in case it is added to
3586      symbol, but it increases complexity.  */
3587   if (offset)
3588     cost.complexity++;
3589
3590   cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3591   return cost;
3592
3593 fallback:
3594   {
3595     /* Just get the expression, expand it and measure the cost.  */
3596     tree comp = get_computation_at (data->current_loop, use, cand, at);
3597
3598     if (!comp)
3599       return infinite_cost;
3600
3601     if (address_p)
3602       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3603
3604     return new_cost (computation_cost (comp), 0);
3605   }
3606 }
3607
3608 /* Determines the cost of the computation by that USE is expressed
3609    from induction variable CAND.  If ADDRESS_P is true, we just need
3610    to create an address from it, otherwise we want to get it into
3611    register.  A set of invariants we depend on is stored in
3612    DEPENDS_ON.  */
3613
3614 static comp_cost
3615 get_computation_cost (struct ivopts_data *data,
3616                       struct iv_use *use, struct iv_cand *cand,
3617                       bool address_p, bitmap *depends_on)
3618 {
3619   return get_computation_cost_at (data,
3620                                   use, cand, address_p, depends_on, use->stmt);
3621 }
3622
3623 /* Determines cost of basing replacement of USE on CAND in a generic
3624    expression.  */
3625
3626 static bool
3627 determine_use_iv_cost_generic (struct ivopts_data *data,
3628                                struct iv_use *use, struct iv_cand *cand)
3629 {
3630   bitmap depends_on;
3631   comp_cost cost;
3632
3633   /* The simple case first -- if we need to express value of the preserved
3634      original biv, the cost is 0.  This also prevents us from counting the
3635      cost of increment twice -- once at this use and once in the cost of
3636      the candidate.  */
3637   if (cand->pos == IP_ORIGINAL
3638       && cand->incremented_at == use->stmt)
3639     {
3640       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3641       return true;
3642     }
3643
3644   cost = get_computation_cost (data, use, cand, false, &depends_on);
3645   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3646
3647   return !infinite_cost_p (cost);
3648 }
3649
3650 /* Determines cost of basing replacement of USE on CAND in an address.  */
3651
3652 static bool
3653 determine_use_iv_cost_address (struct ivopts_data *data,
3654                                struct iv_use *use, struct iv_cand *cand)
3655 {
3656   bitmap depends_on;
3657   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3658
3659   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3660
3661   return !infinite_cost_p (cost);
3662 }
3663
3664 /* Computes value of candidate CAND at position AT in iteration NITER, and
3665    stores it to VAL.  */
3666
3667 static void
3668 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3669                aff_tree *val)
3670 {
3671   aff_tree step, delta, nit;
3672   struct iv *iv = cand->iv;
3673   tree type = TREE_TYPE (iv->base);
3674
3675   tree_to_aff_combination (iv->step, type, &step);
3676   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3677   aff_combination_convert (&nit, type);
3678   aff_combination_mult (&nit, &step, &delta);
3679   if (stmt_after_increment (loop, cand, at))
3680     aff_combination_add (&delta, &step);
3681
3682   tree_to_aff_combination (iv->base, type, val);
3683   aff_combination_add (val, &delta);
3684 }
3685
3686 /* Returns period of induction variable iv.  */
3687
3688 static tree
3689 iv_period (struct iv *iv)
3690 {
3691   tree step = iv->step, period, type;
3692   tree pow2div;
3693
3694   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3695
3696   /* Period of the iv is gcd (step, type range).  Since type range is power
3697      of two, it suffices to determine the maximum power of two that divides
3698      step.  */
3699   pow2div = num_ending_zeros (step);
3700   type = unsigned_type_for (TREE_TYPE (step));
3701
3702   period = build_low_bits_mask (type,
3703                                 (TYPE_PRECISION (type)
3704                                  - tree_low_cst (pow2div, 1)));
3705
3706   return period;
3707 }
3708
3709 /* Returns the comparison operator used when eliminating the iv USE.  */
3710
3711 static enum tree_code
3712 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3713 {
3714   struct loop *loop = data->current_loop;
3715   basic_block ex_bb;
3716   edge exit;
3717
3718   ex_bb = bb_for_stmt (use->stmt);
3719   exit = EDGE_SUCC (ex_bb, 0);
3720   if (flow_bb_inside_loop_p (loop, exit->dest))
3721     exit = EDGE_SUCC (ex_bb, 1);
3722
3723   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3724 }
3725
3726 /* Check whether it is possible to express the condition in USE by comparison
3727    of candidate CAND.  If so, store the value compared with to BOUND.  */
3728
3729 static bool
3730 may_eliminate_iv (struct ivopts_data *data,
3731                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3732 {
3733   basic_block ex_bb;
3734   edge exit;
3735   tree nit, period;
3736   struct loop *loop = data->current_loop;
3737   aff_tree bnd;
3738   double_int period_value, max_niter;
3739
3740   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3741     return false;
3742
3743   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3744      for other conditions inside loop body.  */
3745   ex_bb = bb_for_stmt (use->stmt);
3746   if (use->stmt != last_stmt (ex_bb)
3747       || TREE_CODE (use->stmt) != COND_EXPR)
3748     return false;
3749   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3750     return false;
3751
3752   exit = EDGE_SUCC (ex_bb, 0);
3753   if (flow_bb_inside_loop_p (loop, exit->dest))
3754     exit = EDGE_SUCC (ex_bb, 1);
3755   if (flow_bb_inside_loop_p (loop, exit->dest))
3756     return false;
3757
3758   nit = niter_for_exit (data, exit);
3759   if (!nit)
3760     return false;
3761
3762   /* Determine whether we may use the variable to test whether niter iterations
3763      elapsed.  This is the case iff the period of the induction variable is
3764      greater than the number of iterations.  */
3765   period = iv_period (cand->iv);
3766   if (!period)
3767     return false;
3768
3769   /* Compare the period with the estimate on the number of iterations of the
3770      loop.  */
3771   if (!estimated_loop_iterations (loop, true, &max_niter))
3772     return false;
3773   period_value = tree_to_double_int (period);
3774   if (double_int_ucmp (period_value, max_niter) <= 0)
3775     return false;
3776
3777   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3778   *bound = aff_combination_to_tree (&bnd);
3779   return true;
3780 }
3781
3782 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3783
3784 static bool
3785 determine_use_iv_cost_condition (struct ivopts_data *data,
3786                                  struct iv_use *use, struct iv_cand *cand)
3787 {
3788   tree bound = NULL_TREE;
3789   struct iv *cmp_iv;
3790   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3791   comp_cost elim_cost, express_cost, cost;
3792   bool ok;
3793
3794   /* Only consider real candidates.  */
3795   if (!cand->iv)
3796     {
3797       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3798       return false;
3799     }
3800
3801   /* Try iv elimination.  */
3802   if (may_eliminate_iv (data, use, cand, &bound))
3803     {
3804       elim_cost = force_var_cost (data, bound, &depends_on_elim);
3805       /* The bound is a loop invariant, so it will be only computed
3806          once.  */
3807       elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3808     }
3809   else
3810     elim_cost = infinite_cost;
3811
3812   /* Try expressing the original giv.  If it is compared with an invariant,
3813      note that we cannot get rid of it.  */
3814   ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3815   gcc_assert (ok);
3816
3817   express_cost = get_computation_cost (data, use, cand, false,
3818                                        &depends_on_express);
3819   fd_ivopts_data = data;
3820   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3821
3822   /* Choose the better approach.  */
3823   if (compare_costs (elim_cost, express_cost) < 0)
3824     {
3825       cost = elim_cost;
3826       depends_on = depends_on_elim;
3827       depends_on_elim = NULL;
3828     }
3829   else
3830     {
3831       cost = express_cost;
3832       depends_on = depends_on_express;
3833       depends_on_express = NULL;
3834       bound = NULL_TREE;
3835     }
3836
3837   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3838
3839   if (depends_on_elim)
3840     BITMAP_FREE (depends_on_elim);
3841   if (depends_on_express)
3842     BITMAP_FREE (depends_on_express);
3843
3844   return !infinite_cost_p (cost);
3845 }
3846
3847 /* Determines cost of basing replacement of USE on CAND.  Returns false
3848    if USE cannot be based on CAND.  */
3849
3850 static bool
3851 determine_use_iv_cost (struct ivopts_data *data,
3852                        struct iv_use *use, struct iv_cand *cand)
3853 {
3854   switch (use->type)
3855     {
3856     case USE_NONLINEAR_EXPR:
3857       return determine_use_iv_cost_generic (data, use, cand);
3858
3859     case USE_ADDRESS:
3860       return determine_use_iv_cost_address (data, use, cand);
3861
3862     case USE_COMPARE:
3863       return determine_use_iv_cost_condition (data, use, cand);
3864
3865     default:
3866       gcc_unreachable ();
3867     }
3868 }
3869
3870 /* Determines costs of basing the use of the iv on an iv candidate.  */
3871
3872 static void
3873 determine_use_iv_costs (struct ivopts_data *data)
3874 {
3875   unsigned i, j;
3876   struct iv_use *use;
3877   struct iv_cand *cand;
3878   bitmap to_clear = BITMAP_ALLOC (NULL);
3879
3880   alloc_use_cost_map (data);
3881
3882   for (i = 0; i < n_iv_uses (data); i++)
3883     {
3884       use = iv_use (data, i);
3885
3886       if (data->consider_all_candidates)
3887         {
3888           for (j = 0; j < n_iv_cands (data); j++)
3889             {
3890               cand = iv_cand (data, j);
3891               determine_use_iv_cost (data, use, cand);
3892             }
3893         }
3894       else
3895         {
3896           bitmap_iterator bi;
3897
3898           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3899             {
3900               cand = iv_cand (data, j);
3901               if (!determine_use_iv_cost (data, use, cand))
3902                 bitmap_set_bit (to_clear, j);
3903             }
3904
3905           /* Remove the candidates for that the cost is infinite from
3906              the list of related candidates.  */
3907           bitmap_and_compl_into (use->related_cands, to_clear);
3908           bitmap_clear (to_clear);
3909         }
3910     }
3911
3912   BITMAP_FREE (to_clear);
3913
3914   if (dump_file && (dump_flags & TDF_DETAILS))
3915     {
3916       fprintf (dump_file, "Use-candidate costs:\n");
3917
3918       for (i = 0; i < n_iv_uses (data); i++)
3919         {
3920           use = iv_use (data, i);
3921
3922           fprintf (dump_file, "Use %d:\n", i);
3923           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
3924           for (j = 0; j < use->n_map_members; j++)
3925             {
3926               if (!use->cost_map[j].cand
3927                   || infinite_cost_p (use->cost_map[j].cost))
3928                 continue;
3929
3930               fprintf (dump_file, "  %d\t%d\t%d\t",
3931                        use->cost_map[j].cand->id,
3932                        use->cost_map[j].cost.cost,
3933                        use->cost_map[j].cost.complexity);
3934               if (use->cost_map[j].depends_on)
3935                 bitmap_print (dump_file,
3936                               use->cost_map[j].depends_on, "","");
3937               fprintf (dump_file, "\n");
3938             }
3939
3940           fprintf (dump_file, "\n");
3941         }
3942       fprintf (dump_file, "\n");
3943     }
3944 }
3945
3946 /* Determines cost of the candidate CAND.  */
3947
3948 static void
3949 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3950 {
3951   comp_cost cost_base;
3952   unsigned cost, cost_step;
3953   tree base;
3954
3955   if (!cand->iv)
3956     {
3957       cand->cost = 0;
3958       return;
3959     }
3960
3961   /* There are two costs associated with the candidate -- its increment
3962      and its initialization.  The second is almost negligible for any loop
3963      that rolls enough, so we take it just very little into account.  */
3964
3965   base = cand->iv->base;
3966   cost_base = force_var_cost (data, base, NULL);
3967   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3968
3969   cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
3970
3971   /* Prefer the original ivs unless we may gain something by replacing it.
3972      The reason is to makee debugging simpler; so this is not relevant for
3973      artificial ivs created by other optimization passes.  */
3974   if (cand->pos != IP_ORIGINAL
3975       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3976     cost++;
3977
3978   /* Prefer not to insert statements into latch unless there are some
3979      already (so that we do not create unnecessary jumps).  */
3980   if (cand->pos == IP_END
3981       && empty_block_p (ip_end_pos (data->current_loop)))
3982     cost++;
3983
3984   cand->cost = cost;
3985 }
3986
3987 /* Determines costs of computation of the candidates.  */
3988
3989 static void
3990 determine_iv_costs (struct ivopts_data *data)
3991 {
3992   unsigned i;
3993
3994   if (dump_file && (dump_flags & TDF_DETAILS))
3995     {
3996       fprintf (dump_file, "Candidate costs:\n");
3997       fprintf (dump_file, "  cand\tcost\n");
3998     }
3999
4000   for (i = 0; i < n_iv_cands (data); i++)
4001     {
4002       struct iv_cand *cand = iv_cand (data, i);
4003
4004       determine_iv_cost (data, cand);
4005
4006       if (dump_file && (dump_flags & TDF_DETAILS))
4007         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4008     }
4009
4010   if (dump_file && (dump_flags & TDF_DETAILS))
4011     fprintf (dump_file, "\n");
4012 }
4013
4014 /* Calculates cost for having SIZE induction variables.  */
4015
4016 static unsigned
4017 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4018 {
4019   /* We add size to the cost, so that we prefer eliminating ivs
4020      if possible.  */
4021   return size + estimate_reg_pressure_cost (size, data->regs_used);
4022 }
4023
4024 /* For each size of the induction variable set determine the penalty.  */
4025
4026 static void
4027 determine_set_costs (struct ivopts_data *data)
4028 {
4029   unsigned j, n;
4030   tree phi, op;
4031   struct loop *loop = data->current_loop;
4032   bitmap_iterator bi;
4033
4034   /* We use the following model (definitely improvable, especially the
4035      cost function -- TODO):
4036
4037      We estimate the number of registers available (using MD data), name it A.
4038
4039      We estimate the number of registers used by the loop, name it U.  This
4040      number is obtained as the number of loop phi nodes (not counting virtual
4041      registers and bivs) + the number of variables from outside of the loop.
4042
4043      We set a reserve R (free regs that are used for temporary computations,
4044      etc.).  For now the reserve is a constant 3.
4045
4046      Let I be the number of induction variables.
4047
4048      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4049         make a lot of ivs without a reason).
4050      -- if A - R < U + I <= A, the cost is I * PRES_COST
4051      -- if U + I > A, the cost is I * PRES_COST and
4052         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4053
4054   if (dump_file && (dump_flags & TDF_DETAILS))
4055     {
4056       fprintf (dump_file, "Global costs:\n");
4057       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4058       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost);
4059       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4060     }
4061
4062   n = 0;
4063   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4064     {
4065       op = PHI_RESULT (phi);
4066
4067       if (!is_gimple_reg (op))
4068         continue;
4069
4070       if (get_iv (data, op))
4071         continue;
4072
4073       n++;
4074     }
4075
4076   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4077     {
4078       struct version_info *info = ver_info (data, j);
4079
4080       if (info->inv_id && info->has_nonlin_use)
4081         n++;
4082     }
4083
4084   data->regs_used = n;
4085   if (dump_file && (dump_flags & TDF_DETAILS))
4086     fprintf (dump_file, "  regs_used %d\n", n);
4087
4088   if (dump_file && (dump_flags & TDF_DETAILS))
4089     {
4090       fprintf (dump_file, "  cost for size:\n");
4091       fprintf (dump_file, "  ivs\tcost\n");
4092       for (j = 0; j <= 2 * target_avail_regs; j++)
4093         fprintf (dump_file, "  %d\t%d\n", j,
4094                  ivopts_global_cost_for_size (data, j));
4095       fprintf (dump_file, "\n");
4096     }
4097 }
4098
4099 /* Returns true if A is a cheaper cost pair than B.  */
4100
4101 static bool
4102 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4103 {
4104   int cmp;
4105
4106   if (!a)
4107     return false;
4108
4109   if (!b)
4110     return true;
4111
4112   cmp = compare_costs (a->cost, b->cost);
4113   if (cmp < 0)
4114     return true;
4115
4116   if (cmp > 0)
4117     return false;
4118
4119   /* In case the costs are the same, prefer the cheaper candidate.  */
4120   if (a->cand->cost < b->cand->cost)
4121     return true;
4122
4123   return false;
4124 }
4125
4126 /* Computes the cost field of IVS structure.  */
4127
4128 static void
4129 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4130 {
4131   comp_cost cost = ivs->cand_use_cost;
4132   cost.cost += ivs->cand_cost;
4133   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4134
4135   ivs->cost = cost;
4136 }
4137
4138 /* Remove invariants in set INVS to set IVS.  */
4139
4140 static void
4141 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4142 {
4143   bitmap_iterator bi;
4144   unsigned iid;
4145
4146   if (!invs)
4147     return;
4148
4149   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4150     {
4151       ivs->n_invariant_uses[iid]--;
4152       if (ivs->n_invariant_uses[iid] == 0)
4153         ivs->n_regs--;
4154     }
4155 }
4156
4157 /* Set USE not to be expressed by any candidate in IVS.  */
4158
4159 static void
4160 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4161                  struct iv_use *use)
4162 {
4163   unsigned uid = use->id, cid;
4164   struct cost_pair *cp;
4165
4166   cp = ivs->cand_for_use[uid];
4167   if (!cp)
4168     return;
4169   cid = cp->cand->id;
4170
4171   ivs->bad_uses++;
4172   ivs->cand_for_use[uid] = NULL;
4173   ivs->n_cand_uses[cid]--;
4174
4175   if (ivs->n_cand_uses[cid] == 0)
4176     {
4177       bitmap_clear_bit (ivs->cands, cid);
4178       /* Do not count the pseudocandidates.  */
4179       if (cp->cand->iv)
4180         ivs->n_regs--;
4181       ivs->n_cands--;
4182       ivs->cand_cost -= cp->cand->cost;
4183
4184       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4185     }
4186
4187   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4188
4189   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4190   iv_ca_recount_cost (data, ivs);
4191 }
4192
4193 /* Add invariants in set INVS to set IVS.  */
4194
4195 static void
4196 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4197 {
4198   bitmap_iterator bi;
4199   unsigned iid;
4200
4201   if (!invs)
4202     return;
4203
4204   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4205     {
4206       ivs->n_invariant_uses[iid]++;
4207       if (ivs->n_invariant_uses[iid] == 1)
4208         ivs->n_regs++;
4209     }
4210 }
4211
4212 /* Set cost pair for USE in set IVS to CP.  */
4213
4214 static void
4215 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4216               struct iv_use *use, struct cost_pair *cp)
4217 {
4218   unsigned uid = use->id, cid;
4219
4220   if (ivs->cand_for_use[uid] == cp)
4221     return;
4222
4223   if (ivs->cand_for_use[uid])
4224     iv_ca_set_no_cp (data, ivs, use);
4225
4226   if (cp)
4227     {
4228       cid = cp->cand->id;
4229
4230       ivs->bad_uses--;
4231       ivs->cand_for_use[uid] = cp;
4232       ivs->n_cand_uses[cid]++;
4233       if (ivs->n_cand_uses[cid] == 1)
4234         {
4235           bitmap_set_bit (ivs->cands, cid);
4236           /* Do not count the pseudocandidates.  */
4237           if (cp->cand->iv)
4238             ivs->n_regs++;
4239           ivs->n_cands++;
4240           ivs->cand_cost += cp->cand->cost;
4241
4242           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4243         }
4244
4245       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4246       iv_ca_set_add_invariants (ivs, cp->depends_on);
4247       iv_ca_recount_cost (data, ivs);
4248     }
4249 }
4250
4251 /* Extend set IVS by expressing USE by some of the candidates in it
4252    if possible.  */
4253
4254 static void
4255 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4256                struct iv_use *use)
4257 {
4258   struct cost_pair *best_cp = NULL, *cp;
4259   bitmap_iterator bi;
4260   unsigned i;
4261
4262   gcc_assert (ivs->upto >= use->id);
4263
4264   if (ivs->upto == use->id)
4265     {
4266       ivs->upto++;
4267       ivs->bad_uses++;
4268     }
4269
4270   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4271     {
4272       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4273
4274       if (cheaper_cost_pair (cp, best_cp))
4275         best_cp = cp;
4276     }
4277
4278   iv_ca_set_cp (data, ivs, use, best_cp);
4279 }
4280
4281 /* Get cost for assignment IVS.  */
4282
4283 static comp_cost
4284 iv_ca_cost (struct iv_ca *ivs)
4285 {
4286   return (ivs->bad_uses ? infinite_cost : ivs->cost);
4287 }
4288
4289 /* Returns true if all dependences of CP are among invariants in IVS.  */
4290
4291 static bool
4292 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4293 {
4294   unsigned i;
4295   bitmap_iterator bi;
4296
4297   if (!cp->depends_on)
4298     return true;
4299
4300   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4301     {
4302       if (ivs->n_invariant_uses[i] == 0)
4303         return false;
4304     }
4305
4306   return true;
4307 }
4308
4309 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4310    it before NEXT_CHANGE.  */
4311
4312 static struct iv_ca_delta *
4313 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4314                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4315 {
4316   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4317
4318   change->use = use;
4319   change->old_cp = old_cp;
4320   change->new_cp = new_cp;
4321   change->next_change = next_change;
4322
4323   return change;
4324 }
4325
4326 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4327    are rewritten.  */
4328
4329 static struct iv_ca_delta *
4330 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4331 {
4332   struct iv_ca_delta *last;
4333
4334   if (!l2)
4335     return l1;
4336
4337   if (!l1)
4338     return l2;
4339
4340   for (last = l1; last->next_change; last = last->next_change)
4341     continue;
4342   last->next_change = l2;
4343
4344   return l1;
4345 }
4346
4347 /* Returns candidate by that USE is expressed in IVS.  */
4348
4349 static struct cost_pair *
4350 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4351 {
4352   return ivs->cand_for_use[use->id];
4353 }
4354
4355 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4356
4357 static struct iv_ca_delta *
4358 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4359 {
4360   struct iv_ca_delta *act, *next, *prev = NULL;
4361   struct cost_pair *tmp;
4362
4363   for (act = delta; act; act = next)
4364     {
4365       next = act->next_change;
4366       act->next_change = prev;
4367       prev = act;
4368
4369       tmp = act->old_cp;
4370       act->old_cp = act->new_cp;
4371       act->new_cp = tmp;
4372     }
4373
4374   return prev;
4375 }
4376
4377 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4378    reverted instead.  */
4379
4380 static void
4381 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4382                     struct iv_ca_delta *delta, bool forward)
4383 {
4384   struct cost_pair *from, *to;
4385   struct iv_ca_delta *act;
4386
4387   if (!forward)
4388     delta = iv_ca_delta_reverse (delta);
4389
4390   for (act = delta; act; act = act->next_change)
4391     {
4392       from = act->old_cp;
4393       to = act->new_cp;
4394       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4395       iv_ca_set_cp (data, ivs, act->use, to);
4396     }
4397
4398   if (!forward)
4399     iv_ca_delta_reverse (delta);
4400 }
4401
4402 /* Returns true if CAND is used in IVS.  */
4403
4404 static bool
4405 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4406 {
4407   return ivs->n_cand_uses[cand->id] > 0;
4408 }
4409
4410 /* Returns number of induction variable candidates in the set IVS.  */
4411
4412 static unsigned
4413 iv_ca_n_cands (struct iv_ca *ivs)
4414 {
4415   return ivs->n_cands;
4416 }
4417
4418 /* Free the list of changes DELTA.  */
4419
4420 static void
4421 iv_ca_delta_free (struct iv_ca_delta **delta)
4422 {
4423   struct iv_ca_delta *act, *next;
4424
4425   for (act = *delta; act; act = next)
4426     {
4427       next = act->next_change;
4428       free (act);
4429     }
4430
4431   *delta = NULL;
4432 }
4433
4434 /* Allocates new iv candidates assignment.  */
4435
4436 static struct iv_ca *
4437 iv_ca_new (struct ivopts_data *data)
4438 {
4439   struct iv_ca *nw = XNEW (struct iv_ca);
4440
4441   nw->upto = 0;
4442   nw->bad_uses = 0;
4443   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4444   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4445   nw->cands = BITMAP_ALLOC (NULL);
4446   nw->n_cands = 0;
4447   nw->n_regs = 0;
4448   nw->cand_use_cost = zero_cost;
4449   nw->cand_cost = 0;
4450   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4451   nw->cost = zero_cost;
4452
4453   return nw;
4454 }
4455
4456 /* Free memory occupied by the set IVS.  */
4457
4458 static void
4459 iv_ca_free (struct iv_ca **ivs)
4460 {
4461   free ((*ivs)->cand_for_use);
4462   free ((*ivs)->n_cand_uses);
4463   BITMAP_FREE ((*ivs)->cands);
4464   free ((*ivs)->n_invariant_uses);
4465   free (*ivs);
4466   *ivs = NULL;
4467 }
4468
4469 /* Dumps IVS to FILE.  */
4470
4471 static void
4472 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4473 {
4474   const char *pref = "  invariants ";
4475   unsigned i;
4476   comp_cost cost = iv_ca_cost (ivs);
4477
4478   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4479   bitmap_print (file, ivs->cands, "  candidates ","\n");
4480
4481   for (i = 1; i <= data->max_inv_id; i++)
4482     if (ivs->n_invariant_uses[i])
4483       {
4484         fprintf (file, "%s%d", pref, i);
4485         pref = ", ";
4486       }
4487   fprintf (file, "\n");
4488 }
4489
4490 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4491    new set, and store differences in DELTA.  Number of induction variables
4492    in the new set is stored to N_IVS.  */
4493
4494 static comp_cost
4495 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4496               struct iv_cand *cand, struct iv_ca_delta **delta,
4497               unsigned *n_ivs)
4498 {
4499   unsigned i;
4500   comp_cost cost;
4501   struct iv_use *use;
4502   struct cost_pair *old_cp, *new_cp;
4503
4504   *delta = NULL;
4505   for (i = 0; i < ivs->upto; i++)
4506     {
4507       use = iv_use (data, i);
4508       old_cp = iv_ca_cand_for_use (ivs, use);
4509
4510       if (old_cp
4511           && old_cp->cand == cand)
4512         continue;
4513
4514       new_cp = get_use_iv_cost (data, use, cand);
4515       if (!new_cp)
4516         continue;
4517
4518       if (!iv_ca_has_deps (ivs, new_cp))
4519         continue;
4520
4521       if (!cheaper_cost_pair (new_cp, old_cp))
4522         continue;
4523
4524       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4525     }
4526
4527   iv_ca_delta_commit (data, ivs, *delta, true);
4528   cost = iv_ca_cost (ivs);
4529   if (n_ivs)
4530     *n_ivs = iv_ca_n_cands (ivs);
4531   iv_ca_delta_commit (data, ivs, *delta, false);
4532
4533   return cost;
4534 }
4535
4536 /* Try narrowing set IVS by removing CAND.  Return the cost of
4537    the new set and store the differences in DELTA.  */
4538
4539 static comp_cost
4540 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4541               struct iv_cand *cand, struct iv_ca_delta **delta)
4542 {
4543   unsigned i, ci;
4544   struct iv_use *use;
4545   struct cost_pair *old_cp, *new_cp, *cp;
4546   bitmap_iterator bi;
4547   struct iv_cand *cnd;
4548   comp_cost cost;
4549
4550   *delta = NULL;
4551   for (i = 0; i < n_iv_uses (data); i++)
4552     {
4553       use = iv_use (data, i);
4554
4555       old_cp = iv_ca_cand_for_use (ivs, use);
4556       if (old_cp->cand != cand)
4557         continue;
4558
4559       new_cp = NULL;
4560
4561       if (data->consider_all_candidates)
4562         {
4563           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4564             {
4565               if (ci == cand->id)
4566                 continue;
4567
4568               cnd = iv_cand (data, ci);
4569
4570               cp = get_use_iv_cost (data, use, cnd);
4571               if (!cp)
4572                 continue;
4573               if (!iv_ca_has_deps (ivs, cp))
4574                 continue;
4575
4576               if (!cheaper_cost_pair (cp, new_cp))
4577                 continue;
4578
4579               new_cp = cp;
4580             }
4581         }
4582       else
4583         {
4584           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4585             {
4586               if (ci == cand->id)
4587                 continue;
4588
4589               cnd = iv_cand (data, ci);
4590
4591               cp = get_use_iv_cost (data, use, cnd);
4592               if (!cp)
4593                 continue;
4594               if (!iv_ca_has_deps (ivs, cp))
4595                 continue;
4596
4597               if (!cheaper_cost_pair (cp, new_cp))
4598                 continue;
4599
4600               new_cp = cp;
4601             }
4602         }
4603
4604       if (!new_cp)
4605         {
4606           iv_ca_delta_free (delta);
4607           return infinite_cost;
4608         }
4609
4610       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4611     }
4612
4613   iv_ca_delta_commit (data, ivs, *delta, true);
4614   cost = iv_ca_cost (ivs);
4615   iv_ca_delta_commit (data, ivs, *delta, false);
4616
4617   return cost;
4618 }
4619
4620 /* Try optimizing the set of candidates IVS by removing candidates different
4621    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4622    differences in DELTA.  */
4623
4624 static comp_cost
4625 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4626              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4627 {
4628   bitmap_iterator bi;
4629   struct iv_ca_delta *act_delta, *best_delta;
4630   unsigned i;
4631   comp_cost best_cost, acost;
4632   struct iv_cand *cand;
4633
4634   best_delta = NULL;
4635   best_cost = iv_ca_cost (ivs);
4636
4637   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4638     {
4639       cand = iv_cand (data, i);
4640
4641       if (cand == except_cand)
4642         continue;
4643
4644       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4645
4646       if (compare_costs (acost, best_cost) < 0)
4647         {
4648           best_cost = acost;
4649           iv_ca_delta_free (&best_delta);
4650           best_delta = act_delta;
4651         }
4652       else
4653         iv_ca_delta_free (&act_delta);
4654     }
4655
4656   if (!best_delta)
4657     {
4658       *delta = NULL;
4659       return best_cost;
4660     }
4661
4662   /* Recurse to possibly remove other unnecessary ivs.  */
4663   iv_ca_delta_commit (data, ivs, best_delta, true);
4664   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4665   iv_ca_delta_commit (data, ivs, best_delta, false);
4666   *delta = iv_ca_delta_join (best_delta, *delta);
4667   return best_cost;
4668 }
4669
4670 /* Tries to extend the sets IVS in the best possible way in order
4671    to express the USE.  */
4672
4673 static bool
4674 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4675                   struct iv_use *use)
4676 {
4677   comp_cost best_cost, act_cost;
4678   unsigned i;
4679   bitmap_iterator bi;
4680   struct iv_cand *cand;
4681   struct iv_ca_delta *best_delta = NULL, *act_delta;
4682   struct cost_pair *cp;
4683
4684   iv_ca_add_use (data, ivs, use);
4685   best_cost = iv_ca_cost (ivs);
4686
4687   cp = iv_ca_cand_for_use (ivs, use);
4688   if (cp)
4689     {
4690       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4691       iv_ca_set_no_cp (data, ivs, use);
4692     }
4693
4694   /* First try important candidates not based on any memory object.  Only if
4695      this fails, try the specific ones.  Rationale -- in loops with many
4696      variables the best choice often is to use just one generic biv.  If we
4697      added here many ivs specific to the uses, the optimization algorithm later
4698      would be likely to get stuck in a local minimum, thus causing us to create
4699      too many ivs.  The approach from few ivs to more seems more likely to be
4700      successful -- starting from few ivs, replacing an expensive use by a
4701      specific iv should always be a win.  */
4702   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4703     {
4704       cand = iv_cand (data, i);
4705
4706       if (cand->iv->base_object != NULL_TREE)
4707         continue;
4708
4709       if (iv_ca_cand_used_p (ivs, cand))
4710         continue;
4711
4712       cp = get_use_iv_cost (data, use, cand);
4713       if (!cp)
4714         continue;
4715
4716       iv_ca_set_cp (data, ivs, use, cp);
4717       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4718       iv_ca_set_no_cp (data, ivs, use);
4719       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4720
4721       if (compare_costs (act_cost, best_cost) < 0)
4722         {
4723           best_cost = act_cost;
4724
4725           iv_ca_delta_free (&best_delta);
4726           best_delta = act_delta;
4727         }
4728       else
4729         iv_ca_delta_free (&act_delta);
4730     }
4731
4732   if (infinite_cost_p (best_cost))
4733     {
4734       for (i = 0; i < use->n_map_members; i++)
4735         {
4736           cp = use->cost_map + i;
4737           cand = cp->cand;
4738           if (!cand)
4739             continue;
4740
4741           /* Already tried this.  */
4742           if (cand->important && cand->iv->base_object == NULL_TREE)
4743             continue;
4744
4745           if (iv_ca_cand_used_p (ivs, cand))
4746             continue;
4747
4748           act_delta = NULL;
4749           iv_ca_set_cp (data, ivs, use, cp);
4750           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4751           iv_ca_set_no_cp (data, ivs, use);
4752           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4753                                        cp, act_delta);
4754
4755           if (compare_costs (act_cost, best_cost) < 0)
4756             {
4757               best_cost = act_cost;
4758
4759               if (best_delta)
4760                 iv_ca_delta_free (&best_delta);
4761               best_delta = act_delta;
4762             }
4763           else
4764             iv_ca_delta_free (&act_delta);
4765         }
4766     }
4767
4768   iv_ca_delta_commit (data, ivs, best_delta, true);
4769   iv_ca_delta_free (&best_delta);
4770
4771   return !infinite_cost_p (best_cost);
4772 }
4773
4774 /* Finds an initial assignment of candidates to uses.  */
4775
4776 static struct iv_ca *
4777 get_initial_solution (struct ivopts_data *data)
4778 {
4779   struct iv_ca *ivs = iv_ca_new (data);
4780   unsigned i;
4781
4782   for (i = 0; i < n_iv_uses (data); i++)
4783     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4784       {
4785         iv_ca_free (&ivs);
4786         return NULL;
4787       }
4788
4789   return ivs;
4790 }
4791
4792 /* Tries to improve set of induction variables IVS.  */
4793
4794 static bool
4795 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4796 {
4797   unsigned i, n_ivs;
4798   comp_cost acost, best_cost = iv_ca_cost (ivs);
4799   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4800   struct iv_cand *cand;
4801
4802   /* Try extending the set of induction variables by one.  */
4803   for (i = 0; i < n_iv_cands (data); i++)
4804     {
4805       cand = iv_cand (data, i);
4806
4807       if (iv_ca_cand_used_p (ivs, cand))
4808         continue;
4809
4810       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4811       if (!act_delta)
4812         continue;
4813
4814       /* If we successfully added the candidate and the set is small enough,
4815          try optimizing it by removing other candidates.  */
4816       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4817         {
4818           iv_ca_delta_commit (data, ivs, act_delta, true);
4819           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4820           iv_ca_delta_commit (data, ivs, act_delta, false);
4821           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4822         }
4823
4824       if (compare_costs (acost, best_cost) < 0)
4825         {
4826           best_cost = acost;
4827           iv_ca_delta_free (&best_delta);
4828           best_delta = act_delta;
4829         }
4830       else
4831         iv_ca_delta_free (&act_delta);
4832     }
4833
4834   if (!best_delta)
4835     {
4836       /* Try removing the candidates from the set instead.  */
4837       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4838
4839       /* Nothing more we can do.  */
4840       if (!best_delta)
4841         return false;
4842     }
4843
4844   iv_ca_delta_commit (data, ivs, best_delta, true);
4845   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4846   iv_ca_delta_free (&best_delta);
4847   return true;
4848 }
4849
4850 /* Attempts to find the optimal set of induction variables.  We do simple
4851    greedy heuristic -- we try to replace at most one candidate in the selected
4852    solution and remove the unused ivs while this improves the cost.  */
4853
4854 static struct iv_ca *
4855 find_optimal_iv_set (struct ivopts_data *data)
4856 {
4857   unsigned i;
4858   struct iv_ca *set;
4859   struct iv_use *use;
4860
4861   /* Get the initial solution.  */
4862   set = get_initial_solution (data);
4863   if (!set)
4864     {
4865       if (dump_file && (dump_flags & TDF_DETAILS))
4866         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4867       return NULL;
4868     }
4869
4870   if (dump_file && (dump_flags & TDF_DETAILS))
4871     {
4872       fprintf (dump_file, "Initial set of candidates:\n");
4873       iv_ca_dump (data, dump_file, set);
4874     }
4875
4876   while (try_improve_iv_set (data, set))
4877     {
4878       if (dump_file && (dump_flags & TDF_DETAILS))
4879         {
4880           fprintf (dump_file, "Improved to:\n");
4881           iv_ca_dump (data, dump_file, set);
4882         }
4883     }
4884
4885   if (dump_file && (dump_flags & TDF_DETAILS))
4886     {
4887       comp_cost cost = iv_ca_cost (set);
4888       fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4889     }
4890
4891   for (i = 0; i < n_iv_uses (data); i++)
4892     {
4893       use = iv_use (data, i);
4894       use->selected = iv_ca_cand_for_use (set, use)->cand;
4895     }
4896
4897   return set;
4898 }
4899
4900 /* Creates a new induction variable corresponding to CAND.  */
4901
4902 static void
4903 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4904 {
4905   block_stmt_iterator incr_pos;
4906   tree base;
4907   bool after = false;
4908
4909   if (!cand->iv)
4910     return;
4911
4912   switch (cand->pos)
4913     {
4914     case IP_NORMAL:
4915       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4916       break;
4917
4918     case IP_END:
4919       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4920       after = true;
4921       break;
4922
4923     case IP_ORIGINAL:
4924       /* Mark that the iv is preserved.  */
4925       name_info (data, cand->var_before)->preserve_biv = true;
4926       name_info (data, cand->var_after)->preserve_biv = true;
4927
4928       /* Rewrite the increment so that it uses var_before directly.  */
4929       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4930
4931       return;
4932     }
4933
4934   gimple_add_tmp_var (cand->var_before);
4935   add_referenced_var (cand->var_before);
4936
4937   base = unshare_expr (cand->iv->base);
4938
4939   create_iv (base, unshare_expr (cand->iv->step),
4940              cand->var_before, data->current_loop,
4941              &incr_pos, after, &cand->var_before, &cand->var_after);
4942 }
4943
4944 /* Creates new induction variables described in SET.  */
4945
4946 static void
4947 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4948 {
4949   unsigned i;
4950   struct iv_cand *cand;
4951   bitmap_iterator bi;
4952
4953   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4954     {
4955       cand = iv_cand (data, i);
4956       create_new_iv (data, cand);
4957     }
4958 }
4959
4960 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4961    is true, remove also the ssa name defined by the statement.  */
4962
4963 static void
4964 remove_statement (tree stmt, bool including_defined_name)
4965 {
4966   if (TREE_CODE (stmt) == PHI_NODE)
4967     {
4968       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4969     }
4970   else
4971     {
4972       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4973
4974       bsi_remove (&bsi, true);
4975       release_defs (stmt);
4976     }
4977 }
4978
4979 /* Rewrites USE (definition of iv used in a nonlinear expression)
4980    using candidate CAND.  */
4981
4982 static void
4983 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4984                             struct iv_use *use, struct iv_cand *cand)
4985 {
4986   tree comp;
4987   tree op, tgt, ass;
4988   block_stmt_iterator bsi;
4989
4990   /* An important special case -- if we are asked to express value of
4991      the original iv by itself, just exit; there is no need to
4992      introduce a new computation (that might also need casting the
4993      variable to unsigned and back).  */
4994   if (cand->pos == IP_ORIGINAL
4995       && cand->incremented_at == use->stmt)
4996     {
4997       tree step, ctype, utype;
4998       enum tree_code incr_code = PLUS_EXPR;
4999
5000       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
5001       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
5002
5003       step = cand->iv->step;
5004       ctype = TREE_TYPE (step);
5005       utype = TREE_TYPE (cand->var_after);
5006       if (TREE_CODE (step) == NEGATE_EXPR)
5007         {
5008           incr_code = MINUS_EXPR;
5009           step = TREE_OPERAND (step, 0);
5010         }
5011
5012       /* Check whether we may leave the computation unchanged.
5013          This is the case only if it does not rely on other
5014          computations in the loop -- otherwise, the computation
5015          we rely upon may be removed in remove_unused_ivs,
5016          thus leading to ICE.  */
5017       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
5018       if (TREE_CODE (op) == PLUS_EXPR
5019           || TREE_CODE (op) == MINUS_EXPR
5020           || TREE_CODE (op) == POINTER_PLUS_EXPR)
5021         {
5022           if (TREE_OPERAND (op, 0) == cand->var_before)
5023             op = TREE_OPERAND (op, 1);
5024           else if (TREE_CODE (op) != MINUS_EXPR
5025                    && TREE_OPERAND (op, 1) == cand->var_before)
5026             op = TREE_OPERAND (op, 0);
5027           else
5028             op = NULL_TREE;
5029         }
5030       else
5031         op = NULL_TREE;
5032
5033       if (op
5034           && (TREE_CODE (op) == INTEGER_CST
5035               || operand_equal_p (op, step, 0)))
5036         return;
5037
5038       /* Otherwise, add the necessary computations to express
5039          the iv.  */
5040       op = fold_convert (ctype, cand->var_before);
5041       comp = fold_convert (utype,
5042                            build2 (incr_code, ctype, op,
5043                                    unshare_expr (step)));
5044     }
5045   else
5046     {
5047       comp = get_computation (data->current_loop, use, cand);
5048       gcc_assert (comp != NULL_TREE);
5049     }
5050
5051   switch (TREE_CODE (use->stmt))
5052     {
5053     case PHI_NODE:
5054       tgt = PHI_RESULT (use->stmt);
5055
5056       /* If we should keep the biv, do not replace it.  */
5057       if (name_info (data, tgt)->preserve_biv)
5058         return;
5059
5060       bsi = bsi_after_labels (bb_for_stmt (use->stmt));
5061       break;
5062
5063     case GIMPLE_MODIFY_STMT:
5064       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
5065       bsi = bsi_for_stmt (use->stmt);
5066       break;
5067
5068     default:
5069       gcc_unreachable ();
5070     }
5071
5072   op = force_gimple_operand_bsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5073                                  true, BSI_SAME_STMT);
5074
5075   if (TREE_CODE (use->stmt) == PHI_NODE)
5076     {
5077       ass = build_gimple_modify_stmt (tgt, op);
5078       bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
5079       remove_statement (use->stmt, false);
5080       SSA_NAME_DEF_STMT (tgt) = ass;
5081     }
5082   else
5083     GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
5084 }
5085
5086 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5087    for_each_index.  */
5088
5089 static bool
5090 idx_remove_ssa_names (tree base, tree *idx,
5091                       void *data ATTRIBUTE_UNUSED)
5092 {
5093   tree *op;
5094
5095   if (TREE_CODE (*idx) == SSA_NAME)
5096     *idx = SSA_NAME_VAR (*idx);
5097
5098   if (TREE_CODE (base) == ARRAY_REF)
5099     {
5100       op = &TREE_OPERAND (base, 2);
5101       if (*op
5102           && TREE_CODE (*op) == SSA_NAME)
5103         *op = SSA_NAME_VAR (*op);
5104       op = &TREE_OPERAND (base, 3);
5105       if (*op
5106           && TREE_CODE (*op) == SSA_NAME)
5107         *op = SSA_NAME_VAR (*op);
5108     }
5109
5110   return true;
5111 }
5112
5113 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5114
5115 static tree
5116 unshare_and_remove_ssa_names (tree ref)
5117 {
5118   ref = unshare_expr (ref);
5119   for_each_index (&ref, idx_remove_ssa_names, NULL);
5120
5121   return ref;
5122 }
5123
5124 /* Extract the alias analysis info for the memory reference REF.  There are
5125    several ways how this information may be stored and what precisely is
5126    its semantics depending on the type of the reference, but there always is
5127    somewhere hidden one _DECL node that is used to determine the set of
5128    virtual operands for the reference.  The code below deciphers this jungle
5129    and extracts this single useful piece of information.  */
5130
5131 static tree
5132 get_ref_tag (tree ref, tree orig)
5133 {
5134   tree var = get_base_address (ref);
5135   tree aref = NULL_TREE, tag, sv;
5136   HOST_WIDE_INT offset, size, maxsize;
5137
5138   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5139     {
5140       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5141       if (ref)
5142         break;
5143     }
5144
5145   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5146     return aref;
5147
5148   if (!var)
5149     return NULL_TREE;
5150
5151   if (TREE_CODE (var) == INDIRECT_REF)
5152     {
5153       /* If the base is a dereference of a pointer, first check its name memory
5154          tag.  If it does not have one, use its symbol memory tag.  */
5155       var = TREE_OPERAND (var, 0);
5156       if (TREE_CODE (var) != SSA_NAME)
5157         return NULL_TREE;
5158
5159       if (SSA_NAME_PTR_INFO (var))
5160         {
5161           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5162           if (tag)
5163             return tag;
5164         }
5165
5166       var = SSA_NAME_VAR (var);
5167       tag = symbol_mem_tag (var);
5168       gcc_assert (tag != NULL_TREE);
5169       return tag;
5170     }
5171   else
5172     {
5173       if (!DECL_P (var))
5174         return NULL_TREE;
5175
5176       tag = symbol_mem_tag (var);
5177       if (tag)
5178         return tag;
5179
5180       return var;
5181     }
5182 }
5183
5184 /* Copies the reference information from OLD_REF to NEW_REF.  */
5185
5186 static void
5187 copy_ref_info (tree new_ref, tree old_ref)
5188 {
5189   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5190     copy_mem_ref_info (new_ref, old_ref);
5191   else
5192     {
5193       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5194       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5195     }
5196 }
5197
5198 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5199
5200 static void
5201 rewrite_use_address (struct ivopts_data *data,
5202                      struct iv_use *use, struct iv_cand *cand)
5203 {
5204   aff_tree aff;
5205   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5206   tree ref;
5207   bool ok;
5208
5209   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5210   gcc_assert (ok);
5211   unshare_aff_combination (&aff);
5212
5213   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5214   copy_ref_info (ref, *use->op_p);
5215   *use->op_p = ref;
5216 }
5217
5218 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5219    candidate CAND.  */
5220
5221 static void
5222 rewrite_use_compare (struct ivopts_data *data,
5223                      struct iv_use *use, struct iv_cand *cand)
5224 {
5225   tree comp, *var_p, op, bound;
5226   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5227   enum tree_code compare;
5228   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5229   bool ok;
5230
5231   bound = cp->value;
5232   if (bound)
5233     {
5234       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5235       tree var_type = TREE_TYPE (var);
5236
5237       compare = iv_elimination_compare (data, use);
5238       bound = unshare_expr (fold_convert (var_type, bound));
5239       op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE,
5240                                      true, BSI_SAME_STMT);
5241
5242       *use->op_p = build2 (compare, boolean_type_node, var, op);
5243       return;
5244     }
5245
5246   /* The induction variable elimination failed; just express the original
5247      giv.  */
5248   comp = get_computation (data->current_loop, use, cand);
5249   gcc_assert (comp != NULL_TREE);
5250
5251   ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5252   gcc_assert (ok);
5253
5254   *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5255                                      true, BSI_SAME_STMT);
5256 }
5257
5258 /* Rewrites USE using candidate CAND.  */
5259
5260 static void
5261 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5262 {
5263   push_stmt_changes (&use->stmt);
5264
5265   switch (use->type)
5266     {
5267       case USE_NONLINEAR_EXPR:
5268         rewrite_use_nonlinear_expr (data, use, cand);
5269         break;
5270
5271       case USE_ADDRESS:
5272         rewrite_use_address (data, use, cand);
5273         break;
5274
5275       case USE_COMPARE:
5276         rewrite_use_compare (data, use, cand);
5277         break;
5278
5279       default:
5280         gcc_unreachable ();
5281     }
5282
5283   pop_stmt_changes (&use->stmt);
5284 }
5285
5286 /* Rewrite the uses using the selected induction variables.  */
5287
5288 static void
5289 rewrite_uses (struct ivopts_data *data)
5290 {
5291   unsigned i;
5292   struct iv_cand *cand;
5293   struct iv_use *use;
5294
5295   for (i = 0; i < n_iv_uses (data); i++)
5296     {
5297       use = iv_use (data, i);
5298       cand = use->selected;
5299       gcc_assert (cand);
5300
5301       rewrite_use (data, use, cand);
5302     }
5303 }
5304
5305 /* Removes the ivs that are not used after rewriting.  */
5306
5307 static void
5308 remove_unused_ivs (struct ivopts_data *data)
5309 {
5310   unsigned j;
5311   bitmap_iterator bi;
5312
5313   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5314     {
5315       struct version_info *info;
5316
5317       info = ver_info (data, j);
5318       if (info->iv
5319           && !integer_zerop (info->iv->step)
5320           && !info->inv_id
5321           && !info->iv->have_use_for
5322           && !info->preserve_biv)
5323         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5324     }
5325 }
5326
5327 /* Frees data allocated by the optimization of a single loop.  */
5328
5329 static void
5330 free_loop_data (struct ivopts_data *data)
5331 {
5332   unsigned i, j;
5333   bitmap_iterator bi;
5334   tree obj;
5335
5336   if (data->niters)
5337     {
5338       pointer_map_destroy (data->niters);
5339       data->niters = NULL;
5340     }
5341
5342   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5343     {
5344       struct version_info *info;
5345
5346       info = ver_info (data, i);
5347       if (info->iv)
5348         free (info->iv);
5349       info->iv = NULL;
5350       info->has_nonlin_use = false;
5351       info->preserve_biv = false;
5352       info->inv_id = 0;
5353     }
5354   bitmap_clear (data->relevant);
5355   bitmap_clear (data->important_candidates);
5356
5357   for (i = 0; i < n_iv_uses (data); i++)
5358     {
5359       struct iv_use *use = iv_use (data, i);
5360
5361       free (use->iv);
5362       BITMAP_FREE (use->related_cands);
5363       for (j = 0; j < use->n_map_members; j++)
5364         if (use->cost_map[j].depends_on)
5365           BITMAP_FREE (use->cost_map[j].depends_on);
5366       free (use->cost_map);
5367       free (use);
5368     }
5369   VEC_truncate (iv_use_p, data->iv_uses, 0);
5370
5371   for (i = 0; i < n_iv_cands (data); i++)
5372     {
5373       struct iv_cand *cand = iv_cand (data, i);
5374
5375       if (cand->iv)
5376         free (cand->iv);
5377       if (cand->depends_on)
5378         BITMAP_FREE (cand->depends_on);
5379       free (cand);
5380     }
5381   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5382
5383   if (data->version_info_size < num_ssa_names)
5384     {
5385       data->version_info_size = 2 * num_ssa_names;
5386       free (data->version_info);
5387       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5388     }
5389
5390   data->max_inv_id = 0;
5391
5392   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5393     SET_DECL_RTL (obj, NULL_RTX);
5394
5395   VEC_truncate (tree, decl_rtl_to_reset, 0);
5396 }
5397
5398 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5399    loop tree.  */
5400
5401 static void
5402 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5403 {
5404   free_loop_data (data);
5405   free (data->version_info);
5406   BITMAP_FREE (data->relevant);
5407   BITMAP_FREE (data->important_candidates);
5408
5409   VEC_free (tree, heap, decl_rtl_to_reset);
5410   VEC_free (iv_use_p, heap, data->iv_uses);
5411   VEC_free (iv_cand_p, heap, data->iv_candidates);
5412 }
5413
5414 /* Optimizes the LOOP.  Returns true if anything changed.  */
5415
5416 static bool
5417 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5418 {
5419   bool changed = false;
5420   struct iv_ca *iv_ca;
5421   edge exit;
5422
5423   gcc_assert (!data->niters);
5424   data->current_loop = loop;
5425
5426   if (dump_file && (dump_flags & TDF_DETAILS))
5427     {
5428       fprintf (dump_file, "Processing loop %d\n", loop->num);
5429
5430       exit = single_dom_exit (loop);
5431       if (exit)
5432         {
5433           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5434                    exit->src->index, exit->dest->index);
5435           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5436           fprintf (dump_file, "\n");
5437         }
5438
5439       fprintf (dump_file, "\n");
5440     }
5441
5442   /* For each ssa name determines whether it behaves as an induction variable
5443      in some loop.  */
5444   if (!find_induction_variables (data))
5445     goto finish;
5446
5447   /* Finds interesting uses (item 1).  */
5448   find_interesting_uses (data);
5449   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5450     goto finish;
5451
5452   /* Finds candidates for the induction variables (item 2).  */
5453   find_iv_candidates (data);
5454
5455   /* Calculates the costs (item 3, part 1).  */
5456   determine_use_iv_costs (data);
5457   determine_iv_costs (data);
5458   determine_set_costs (data);
5459
5460   /* Find the optimal set of induction variables (item 3, part 2).  */
5461   iv_ca = find_optimal_iv_set (data);
5462   if (!iv_ca)
5463     goto finish;
5464   changed = true;
5465
5466   /* Create the new induction variables (item 4, part 1).  */
5467   create_new_ivs (data, iv_ca);
5468   iv_ca_free (&iv_ca);
5469
5470   /* Rewrite the uses (item 4, part 2).  */
5471   rewrite_uses (data);
5472
5473   /* Remove the ivs that are unused after rewriting.  */
5474   remove_unused_ivs (data);
5475
5476   /* We have changed the structure of induction variables; it might happen
5477      that definitions in the scev database refer to some of them that were
5478      eliminated.  */
5479   scev_reset ();
5480
5481 finish:
5482   free_loop_data (data);
5483
5484   return changed;
5485 }
5486
5487 /* Main entry point.  Optimizes induction variables in loops.  */
5488
5489 void
5490 tree_ssa_iv_optimize (void)
5491 {
5492   struct loop *loop;
5493   struct ivopts_data data;
5494   loop_iterator li;
5495
5496   tree_ssa_iv_optimize_init (&data);
5497
5498   /* Optimize the loops starting with the innermost ones.  */
5499   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5500     {
5501       if (dump_file && (dump_flags & TDF_DETAILS))
5502         flow_loop_dump (loop, dump_file, NULL, 1);
5503
5504       tree_ssa_iv_optimize_loop (&data, loop);
5505     }
5506
5507   tree_ssa_iv_optimize_finalize (&data);
5508 }