gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "rtl.h"
  70 #include "tm_p.h"
  71 #include "hard-reg-set.h"
  72 #include "basic-block.h"
  73 #include "output.h"
  74 #include "diagnostic.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "varray.h"
  80 #include "expr.h"
  81 #include "tree-pass.h"
  82 #include "ggc.h"
  83 #include "insn-config.h"
  84 #include "recog.h"
  85 #include "pointer-set.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92 #include "tree-affine.h"
  93 #include "target.h"
  94
  95 /* The infinite cost.  */
  96 #define INFTY 10000000
  97
  98 /* The expected number of loop iterations.  TODO -- use profiling instead of
  99    this.  */
 100 #define AVG_LOOP_NITER(LOOP) 5
 101
 102
 103 /* Representation of the induction variable.  */
 104 struct iv
 105 {
 106   tree base;            /* Initial value of the iv.  */
 107   tree base_object;     /* A memory object to that the induction variable points.  */
 108   tree step;            /* Step of the iv (constant only).  */
 109   tree ssa_name;        /* The ssa name with the value.  */
 110   bool biv_p;           /* Is it a biv?  */
 111   bool have_use_for;    /* Do we already have a use for it?  */
 112   unsigned use_id;      /* The identifier in the use if it is the case.  */
 113 };
 114
 115 /* Per-ssa version information (induction variable descriptions, etc.).  */
 116 struct version_info
 117 {
 118   tree name;            /* The ssa name.  */
 119   struct iv *iv;        /* Induction variable description.  */
 120   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 121                            an expression that is not an induction variable.  */
 122   unsigned inv_id;      /* Id of an invariant.  */
 123   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 124 };
 125
 126 /* Types of uses.  */
 127 enum use_type
 128 {
 129   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 130   USE_ADDRESS,          /* Use in an address.  */
 131   USE_COMPARE           /* Use is a compare.  */
 132 };
 133
 134 /* Cost of a computation.  */
 135 typedef struct
 136 {
 137   unsigned cost;        /* The runtime cost.  */
 138   unsigned complexity;  /* The estimate of the complexity of the code for
 139                            the computation (in no concrete units --
 140                            complexity field should be larger for more
 141                            complex expressions and addressing modes).  */
 142 } comp_cost;
 143
 144 static const comp_cost zero_cost = {0, 0};
 145 static const comp_cost infinite_cost = {INFTY, INFTY};
 146
 147 /* The candidate - cost pair.  */
 148 struct cost_pair
 149 {
 150   struct iv_cand *cand; /* The candidate.  */
 151   comp_cost cost;       /* The cost.  */
 152   bitmap depends_on;    /* The list of invariants that have to be
 153                            preserved.  */
 154   tree value;           /* For final value elimination, the expression for
 155                            the final value of the iv.  For iv elimination,
 156                            the new bound to compare with.  */
 157 };
 158
 159 /* Use.  */
 160 struct iv_use
 161 {
 162   unsigned id;          /* The id of the use.  */
 163   enum use_type type;   /* Type of the use.  */
 164   struct iv *iv;        /* The induction variable it is based on.  */
 165   tree stmt;            /* Statement in that it occurs.  */
 166   tree *op_p;           /* The place where it occurs.  */
 167   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 168                            important ones.  */
 169
 170   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 171   struct cost_pair *cost_map;
 172                         /* The costs wrto the iv candidates.  */
 173
 174   struct iv_cand *selected;
 175                         /* The selected candidate.  */
 176 };
 177
 178 /* The position where the iv is computed.  */
 179 enum iv_position
 180 {
 181   IP_NORMAL,            /* At the end, just before the exit condition.  */
 182   IP_END,               /* At the end of the latch block.  */
 183   IP_ORIGINAL           /* The original biv.  */
 184 };
 185
 186 /* The induction variable candidate.  */
 187 struct iv_cand
 188 {
 189   unsigned id;          /* The number of the candidate.  */
 190   bool important;       /* Whether this is an "important" candidate, i.e. such
 191                            that it should be considered by all uses.  */
 192   enum iv_position pos; /* Where it is computed.  */
 193   tree incremented_at;  /* For original biv, the statement where it is
 194                            incremented.  */
 195   tree var_before;      /* The variable used for it before increment.  */
 196   tree var_after;       /* The variable used for it after increment.  */
 197   struct iv *iv;        /* The value of the candidate.  NULL for
 198                            "pseudocandidate" used to indicate the possibility
 199                            to replace the final value of an iv by direct
 200                            computation of the value.  */
 201   unsigned cost;        /* Cost of the candidate.  */
 202   bitmap depends_on;    /* The list of invariants that are used in step of the
 203                            biv.  */
 204 };
 205
 206 /* The data used by the induction variable optimizations.  */
 207
 208 typedef struct iv_use *iv_use_p;
 209 DEF_VEC_P(iv_use_p);
 210 DEF_VEC_ALLOC_P(iv_use_p,heap);
 211
 212 typedef struct iv_cand *iv_cand_p;
 213 DEF_VEC_P(iv_cand_p);
 214 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 215
 216 struct ivopts_data
 217 {
 218   /* The currently optimized loop.  */
 219   struct loop *current_loop;
 220
 221   /* Number of registers used in it.  */
 222   unsigned regs_used;
 223
 224   /* Numbers of iterations for all exits of the current loop.  */
 225   struct pointer_map_t *niters;
 226
 227   /* The size of version_info array allocated.  */
 228   unsigned version_info_size;
 229
 230   /* The array of information for the ssa names.  */
 231   struct version_info *version_info;
 232
 233   /* The bitmap of indices in version_info whose value was changed.  */
 234   bitmap relevant;
 235
 236   /* The maximum invariant id.  */
 237   unsigned max_inv_id;
 238
 239   /* The uses of induction variables.  */
 240   VEC(iv_use_p,heap) *iv_uses;
 241
 242   /* The candidates.  */
 243   VEC(iv_cand_p,heap) *iv_candidates;
 244
 245   /* A bitmap of important candidates.  */
 246   bitmap important_candidates;
 247
 248   /* Whether to consider just related and important candidates when replacing a
 249      use.  */
 250   bool consider_all_candidates;
 251 };
 252
 253 /* An assignment of iv candidates to uses.  */
 254
 255 struct iv_ca
 256 {
 257   /* The number of uses covered by the assignment.  */
 258   unsigned upto;
 259
 260   /* Number of uses that cannot be expressed by the candidates in the set.  */
 261   unsigned bad_uses;
 262
 263   /* Candidate assigned to a use, together with the related costs.  */
 264   struct cost_pair **cand_for_use;
 265
 266   /* Number of times each candidate is used.  */
 267   unsigned *n_cand_uses;
 268
 269   /* The candidates used.  */
 270   bitmap cands;
 271
 272   /* The number of candidates in the set.  */
 273   unsigned n_cands;
 274
 275   /* Total number of registers needed.  */
 276   unsigned n_regs;
 277
 278   /* Total cost of expressing uses.  */
 279   comp_cost cand_use_cost;
 280
 281   /* Total cost of candidates.  */
 282   unsigned cand_cost;
 283
 284   /* Number of times each invariant is used.  */
 285   unsigned *n_invariant_uses;
 286
 287   /* Total cost of the assignment.  */
 288   comp_cost cost;
 289 };
 290
 291 /* Difference of two iv candidate assignments.  */
 292
 293 struct iv_ca_delta
 294 {
 295   /* Changed use.  */
 296   struct iv_use *use;
 297
 298   /* An old assignment (for rollback purposes).  */
 299   struct cost_pair *old_cp;
 300
 301   /* A new assignment.  */
 302   struct cost_pair *new_cp;
 303
 304   /* Next change in the list.  */
 305   struct iv_ca_delta *next_change;
 306 };
 307
 308 /* Bound on number of candidates below that all candidates are considered.  */
 309
 310 #define CONSIDER_ALL_CANDIDATES_BOUND \
 311   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 312
 313 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 314    optimizing such a loop would help, and it would take ages).  */
 315
 316 #define MAX_CONSIDERED_USES \
 317   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 318
 319 /* If there are at most this number of ivs in the set, try removing unnecessary
 320    ivs from the set always.  */
 321
 322 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 323   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 324
 325 /* The list of trees for that the decl_rtl field must be reset is stored
 326    here.  */
 327
 328 static VEC(tree,heap) *decl_rtl_to_reset;
 329
 330 /* Number of uses recorded in DATA.  */
 331
 332 static inline unsigned
 333 n_iv_uses (struct ivopts_data *data)
 334 {
 335   return VEC_length (iv_use_p, data->iv_uses);
 336 }
 337
 338 /* Ith use recorded in DATA.  */
 339
 340 static inline struct iv_use *
 341 iv_use (struct ivopts_data *data, unsigned i)
 342 {
 343   return VEC_index (iv_use_p, data->iv_uses, i);
 344 }
 345
 346 /* Number of candidates recorded in DATA.  */
 347
 348 static inline unsigned
 349 n_iv_cands (struct ivopts_data *data)
 350 {
 351   return VEC_length (iv_cand_p, data->iv_candidates);
 352 }
 353
 354 /* Ith candidate recorded in DATA.  */
 355
 356 static inline struct iv_cand *
 357 iv_cand (struct ivopts_data *data, unsigned i)
 358 {
 359   return VEC_index (iv_cand_p, data->iv_candidates, i);
 360 }
 361
 362 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 363
 364 edge
 365 single_dom_exit (struct loop *loop)
 366 {
 367   edge exit = single_exit (loop);
 368
 369   if (!exit)
 370     return NULL;
 371
 372   if (!just_once_each_iteration_p (loop, exit->src))
 373     return NULL;
 374
 375   return exit;
 376 }
 377
 378 /* Dumps information about the induction variable IV to FILE.  */
 379
 380 extern void dump_iv (FILE *, struct iv *);
 381 void
 382 dump_iv (FILE *file, struct iv *iv)
 383 {
 384   if (iv->ssa_name)
 385     {
 386       fprintf (file, "ssa name ");
 387       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 388       fprintf (file, "\n");
 389     }
 390
 391   fprintf (file, "  type ");
 392   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 393   fprintf (file, "\n");
 394
 395   if (iv->step)
 396     {
 397       fprintf (file, "  base ");
 398       print_generic_expr (file, iv->base, TDF_SLIM);
 399       fprintf (file, "\n");
 400
 401       fprintf (file, "  step ");
 402       print_generic_expr (file, iv->step, TDF_SLIM);
 403       fprintf (file, "\n");
 404     }
 405   else
 406     {
 407       fprintf (file, "  invariant ");
 408       print_generic_expr (file, iv->base, TDF_SLIM);
 409       fprintf (file, "\n");
 410     }
 411
 412   if (iv->base_object)
 413     {
 414       fprintf (file, "  base object ");
 415       print_generic_expr (file, iv->base_object, TDF_SLIM);
 416       fprintf (file, "\n");
 417     }
 418
 419   if (iv->biv_p)
 420     fprintf (file, "  is a biv\n");
 421 }
 422
 423 /* Dumps information about the USE to FILE.  */
 424
 425 extern void dump_use (FILE *, struct iv_use *);
 426 void
 427 dump_use (FILE *file, struct iv_use *use)
 428 {
 429   fprintf (file, "use %d\n", use->id);
 430
 431   switch (use->type)
 432     {
 433     case USE_NONLINEAR_EXPR:
 434       fprintf (file, "  generic\n");
 435       break;
 436
 437     case USE_ADDRESS:
 438       fprintf (file, "  address\n");
 439       break;
 440
 441     case USE_COMPARE:
 442       fprintf (file, "  compare\n");
 443       break;
 444
 445     default:
 446       gcc_unreachable ();
 447     }
 448
 449   fprintf (file, "  in statement ");
 450   print_generic_expr (file, use->stmt, TDF_SLIM);
 451   fprintf (file, "\n");
 452
 453   fprintf (file, "  at position ");
 454   if (use->op_p)
 455     print_generic_expr (file, *use->op_p, TDF_SLIM);
 456   fprintf (file, "\n");
 457
 458   dump_iv (file, use->iv);
 459
 460   if (use->related_cands)
 461     {
 462       fprintf (file, "  related candidates ");
 463       dump_bitmap (file, use->related_cands);
 464     }
 465 }
 466
 467 /* Dumps information about the uses to FILE.  */
 468
 469 extern void dump_uses (FILE *, struct ivopts_data *);
 470 void
 471 dump_uses (FILE *file, struct ivopts_data *data)
 472 {
 473   unsigned i;
 474   struct iv_use *use;
 475
 476   for (i = 0; i < n_iv_uses (data); i++)
 477     {
 478       use = iv_use (data, i);
 479
 480       dump_use (file, use);
 481       fprintf (file, "\n");
 482     }
 483 }
 484
 485 /* Dumps information about induction variable candidate CAND to FILE.  */
 486
 487 extern void dump_cand (FILE *, struct iv_cand *);
 488 void
 489 dump_cand (FILE *file, struct iv_cand *cand)
 490 {
 491   struct iv *iv = cand->iv;
 492
 493   fprintf (file, "candidate %d%s\n",
 494            cand->id, cand->important ? " (important)" : "");
 495
 496   if (cand->depends_on)
 497     {
 498       fprintf (file, "  depends on ");
 499       dump_bitmap (file, cand->depends_on);
 500     }
 501
 502   if (!iv)
 503     {
 504       fprintf (file, "  final value replacement\n");
 505       return;
 506     }
 507
 508   switch (cand->pos)
 509     {
 510     case IP_NORMAL:
 511       fprintf (file, "  incremented before exit test\n");
 512       break;
 513
 514     case IP_END:
 515       fprintf (file, "  incremented at end\n");
 516       break;
 517
 518     case IP_ORIGINAL:
 519       fprintf (file, "  original biv\n");
 520       break;
 521     }
 522
 523   dump_iv (file, iv);
 524 }
 525
 526 /* Returns the info for ssa version VER.  */
 527
 528 static inline struct version_info *
 529 ver_info (struct ivopts_data *data, unsigned ver)
 530 {
 531   return data->version_info + ver;
 532 }
 533
 534 /* Returns the info for ssa name NAME.  */
 535
 536 static inline struct version_info *
 537 name_info (struct ivopts_data *data, tree name)
 538 {
 539   return ver_info (data, SSA_NAME_VERSION (name));
 540 }
 541
 542 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 543    emitted in LOOP.  */
 544
 545 static bool
 546 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 547 {
 548   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 549
 550   gcc_assert (bb);
 551
 552   if (sbb == loop->latch)
 553     return true;
 554
 555   if (sbb != bb)
 556     return false;
 557
 558   return stmt == last_stmt (bb);
 559 }
 560
 561 /* Returns true if STMT if after the place where the original induction
 562    variable CAND is incremented.  */
 563
 564 static bool
 565 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 566 {
 567   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 568   basic_block stmt_bb = bb_for_stmt (stmt);
 569   block_stmt_iterator bsi;
 570
 571   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 572     return false;
 573
 574   if (stmt_bb != cand_bb)
 575     return true;
 576
 577   /* Scan the block from the end, since the original ivs are usually
 578      incremented at the end of the loop body.  */
 579   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 580     {
 581       if (bsi_stmt (bsi) == cand->incremented_at)
 582         return false;
 583       if (bsi_stmt (bsi) == stmt)
 584         return true;
 585     }
 586 }
 587
 588 /* Returns true if STMT if after the place where the induction variable
 589    CAND is incremented in LOOP.  */
 590
 591 static bool
 592 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 593 {
 594   switch (cand->pos)
 595     {
 596     case IP_END:
 597       return false;
 598
 599     case IP_NORMAL:
 600       return stmt_after_ip_normal_pos (loop, stmt);
 601
 602     case IP_ORIGINAL:
 603       return stmt_after_ip_original_pos (cand, stmt);
 604
 605     default:
 606       gcc_unreachable ();
 607     }
 608 }
 609
 610 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 611
 612 static bool
 613 abnormal_ssa_name_p (tree exp)
 614 {
 615   if (!exp)
 616     return false;
 617
 618   if (TREE_CODE (exp) != SSA_NAME)
 619     return false;
 620
 621   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 622 }
 623
 624 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 625    abnormal phi node.  Callback for for_each_index.  */
 626
 627 static bool
 628 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 629                                   void *data ATTRIBUTE_UNUSED)
 630 {
 631   if (TREE_CODE (base) == ARRAY_REF)
 632     {
 633       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 634         return false;
 635       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 636         return false;
 637     }
 638
 639   return !abnormal_ssa_name_p (*index);
 640 }
 641
 642 /* Returns true if EXPR contains a ssa name that occurs in an
 643    abnormal phi node.  */
 644
 645 bool
 646 contains_abnormal_ssa_name_p (tree expr)
 647 {
 648   enum tree_code code;
 649   enum tree_code_class codeclass;
 650
 651   if (!expr)
 652     return false;
 653
 654   code = TREE_CODE (expr);
 655   codeclass = TREE_CODE_CLASS (code);
 656
 657   if (code == SSA_NAME)
 658     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 659
 660   if (code == INTEGER_CST
 661       || is_gimple_min_invariant (expr))
 662     return false;
 663
 664   if (code == ADDR_EXPR)
 665     return !for_each_index (&TREE_OPERAND (expr, 0),
 666                             idx_contains_abnormal_ssa_name_p,
 667                             NULL);
 668
 669   switch (codeclass)
 670     {
 671     case tcc_binary:
 672     case tcc_comparison:
 673       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 674         return true;
 675
 676       /* Fallthru.  */
 677     case tcc_unary:
 678       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 679         return true;
 680
 681       break;
 682
 683     default:
 684       gcc_unreachable ();
 685     }
 686
 687   return false;
 688 }
 689
 690 /*  Returns tree describing number of iterations determined from
 691     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 692
 693 static tree
 694 niter_for_exit (struct ivopts_data *data, edge exit)
 695 {
 696   struct tree_niter_desc desc;
 697   tree niter;
 698   void **slot;
 699
 700   if (!data->niters)
 701     {
 702       data->niters = pointer_map_create ();
 703       slot = NULL;
 704     }
 705   else
 706     slot = pointer_map_contains (data->niters, exit);
 707
 708   if (!slot)
 709     {
 710       /* Try to determine number of iterations.  We must know it
 711          unconditionally (i.e., without possibility of # of iterations
 712          being zero).  Also, we cannot safely work with ssa names that
 713          appear in phi nodes on abnormal edges, so that we do not create
 714          overlapping life ranges for them (PR 27283).  */
 715       if (number_of_iterations_exit (data->current_loop,
 716                                      exit, &desc, true)
 717           && integer_zerop (desc.may_be_zero)
 718           && !contains_abnormal_ssa_name_p (desc.niter))
 719         niter = desc.niter;
 720       else
 721         niter = NULL_TREE;
 722
 723       *pointer_map_insert (data->niters, exit) = niter;
 724     }
 725   else
 726     niter = (tree) *slot;
 727
 728   return niter;
 729 }
 730
 731 /* Returns tree describing number of iterations determined from
 732    single dominating exit of DATA->current_loop, or NULL if something
 733    goes wrong.  */
 734
 735 static tree
 736 niter_for_single_dom_exit (struct ivopts_data *data)
 737 {
 738   edge exit = single_dom_exit (data->current_loop);
 739
 740   if (!exit)
 741     return NULL;
 742
 743   return niter_for_exit (data, exit);
 744 }
 745
 746 /* Initializes data structures used by the iv optimization pass, stored
 747    in DATA.  */
 748
 749 static void
 750 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 751 {
 752   data->version_info_size = 2 * num_ssa_names;
 753   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 754   data->relevant = BITMAP_ALLOC (NULL);
 755   data->important_candidates = BITMAP_ALLOC (NULL);
 756   data->max_inv_id = 0;
 757   data->niters = NULL;
 758   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 759   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 760   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 761 }
 762
 763 /* Returns a memory object to that EXPR points.  In case we are able to
 764    determine that it does not point to any such object, NULL is returned.  */
 765
 766 static tree
 767 determine_base_object (tree expr)
 768 {
 769   enum tree_code code = TREE_CODE (expr);
 770   tree base, obj;
 771
 772   /* If this is a pointer casted to any type, we need to determine
 773      the base object for the pointer; so handle conversions before
 774      throwing away non-pointer expressions.  */
 775   if (TREE_CODE (expr) == NOP_EXPR
 776       || TREE_CODE (expr) == CONVERT_EXPR)
 777     return determine_base_object (TREE_OPERAND (expr, 0));
 778
 779   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 780     return NULL_TREE;
 781
 782   switch (code)
 783     {
 784     case INTEGER_CST:
 785       return NULL_TREE;
 786
 787     case ADDR_EXPR:
 788       obj = TREE_OPERAND (expr, 0);
 789       base = get_base_address (obj);
 790
 791       if (!base)
 792         return expr;
 793
 794       if (TREE_CODE (base) == INDIRECT_REF)
 795         return determine_base_object (TREE_OPERAND (base, 0));
 796
 797       return fold_convert (ptr_type_node,
 798                            build_fold_addr_expr (base));
 799
 800     case POINTER_PLUS_EXPR:
 801       return determine_base_object (TREE_OPERAND (expr, 0));
 802
 803     case PLUS_EXPR:
 804     case MINUS_EXPR:
 805       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 806       gcc_unreachable ();
 807
 808     default:
 809       return fold_convert (ptr_type_node, expr);
 810     }
 811 }
 812
 813 /* Allocates an induction variable with given initial value BASE and step STEP
 814    for loop LOOP.  */
 815
 816 static struct iv *
 817 alloc_iv (tree base, tree step)
 818 {
 819   struct iv *iv = XCNEW (struct iv);
 820   gcc_assert (step != NULL_TREE);
 821
 822   iv->base = base;
 823   iv->base_object = determine_base_object (base);
 824   iv->step = step;
 825   iv->biv_p = false;
 826   iv->have_use_for = false;
 827   iv->use_id = 0;
 828   iv->ssa_name = NULL_TREE;
 829
 830   return iv;
 831 }
 832
 833 /* Sets STEP and BASE for induction variable IV.  */
 834
 835 static void
 836 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 837 {
 838   struct version_info *info = name_info (data, iv);
 839
 840   gcc_assert (!info->iv);
 841
 842   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 843   info->iv = alloc_iv (base, step);
 844   info->iv->ssa_name = iv;
 845 }
 846
 847 /* Finds induction variable declaration for VAR.  */
 848
 849 static struct iv *
 850 get_iv (struct ivopts_data *data, tree var)
 851 {
 852   basic_block bb;
 853   tree type = TREE_TYPE (var);
 854
 855   if (!POINTER_TYPE_P (type)
 856       && !INTEGRAL_TYPE_P (type))
 857     return NULL;
 858
 859   if (!name_info (data, var)->iv)
 860     {
 861       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 862
 863       if (!bb
 864           || !flow_bb_inside_loop_p (data->current_loop, bb))
 865         set_iv (data, var, var, build_int_cst (type, 0));
 866     }
 867
 868   return name_info (data, var)->iv;
 869 }
 870
 871 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 872    not define a simple affine biv with nonzero step.  */
 873
 874 static tree
 875 determine_biv_step (tree phi)
 876 {
 877   struct loop *loop = bb_for_stmt (phi)->loop_father;
 878   tree name = PHI_RESULT (phi);
 879   affine_iv iv;
 880
 881   if (!is_gimple_reg (name))
 882     return NULL_TREE;
 883
 884   if (!simple_iv (loop, phi, name, &iv, true))
 885     return NULL_TREE;
 886
 887   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 888 }
 889
 890 /* Finds basic ivs.  */
 891
 892 static bool
 893 find_bivs (struct ivopts_data *data)
 894 {
 895   tree phi, step, type, base;
 896   bool found = false;
 897   struct loop *loop = data->current_loop;
 898
 899   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 900     {
 901       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 902         continue;
 903
 904       step = determine_biv_step (phi);
 905       if (!step)
 906         continue;
 907
 908       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 909       base = expand_simple_operations (base);
 910       if (contains_abnormal_ssa_name_p (base)
 911           || contains_abnormal_ssa_name_p (step))
 912         continue;
 913
 914       type = TREE_TYPE (PHI_RESULT (phi));
 915       base = fold_convert (type, base);
 916       if (step)
 917         {
 918           if (POINTER_TYPE_P (type))
 919             step = fold_convert (sizetype, step);
 920           else
 921             step = fold_convert (type, step);
 922         }
 923
 924       set_iv (data, PHI_RESULT (phi), base, step);
 925       found = true;
 926     }
 927
 928   return found;
 929 }
 930
 931 /* Marks basic ivs.  */
 932
 933 static void
 934 mark_bivs (struct ivopts_data *data)
 935 {
 936   tree phi, var;
 937   struct iv *iv, *incr_iv;
 938   struct loop *loop = data->current_loop;
 939   basic_block incr_bb;
 940
 941   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 942     {
 943       iv = get_iv (data, PHI_RESULT (phi));
 944       if (!iv)
 945         continue;
 946
 947       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 948       incr_iv = get_iv (data, var);
 949       if (!incr_iv)
 950         continue;
 951
 952       /* If the increment is in the subloop, ignore it.  */
 953       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 954       if (incr_bb->loop_father != data->current_loop
 955           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 956         continue;
 957
 958       iv->biv_p = true;
 959       incr_iv->biv_p = true;
 960     }
 961 }
 962
 963 /* Checks whether STMT defines a linear induction variable and stores its
 964    parameters to IV.  */
 965
 966 static bool
 967 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 968 {
 969   tree lhs;
 970   struct loop *loop = data->current_loop;
 971
 972   iv->base = NULL_TREE;
 973   iv->step = NULL_TREE;
 974
 975   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 976     return false;
 977
 978   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 979   if (TREE_CODE (lhs) != SSA_NAME)
 980     return false;
 981
 982   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 983     return false;
 984   iv->base = expand_simple_operations (iv->base);
 985
 986   if (contains_abnormal_ssa_name_p (iv->base)
 987       || contains_abnormal_ssa_name_p (iv->step))
 988     return false;
 989
 990   return true;
 991 }
 992
 993 /* Finds general ivs in statement STMT.  */
 994
 995 static void
 996 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
 997 {
 998   affine_iv iv;
 999
1000   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1001     return;
1002
1003   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1004 }
1005
1006 /* Finds general ivs in basic block BB.  */
1007
1008 static void
1009 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1010 {
1011   block_stmt_iterator bsi;
1012
1013   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1014     find_givs_in_stmt (data, bsi_stmt (bsi));
1015 }
1016
1017 /* Finds general ivs.  */
1018
1019 static void
1020 find_givs (struct ivopts_data *data)
1021 {
1022   struct loop *loop = data->current_loop;
1023   basic_block *body = get_loop_body_in_dom_order (loop);
1024   unsigned i;
1025
1026   for (i = 0; i < loop->num_nodes; i++)
1027     find_givs_in_bb (data, body[i]);
1028   free (body);
1029 }
1030
1031 /* For each ssa name defined in LOOP determines whether it is an induction
1032    variable and if so, its initial value and step.  */
1033
1034 static bool
1035 find_induction_variables (struct ivopts_data *data)
1036 {
1037   unsigned i;
1038   bitmap_iterator bi;
1039
1040   if (!find_bivs (data))
1041     return false;
1042
1043   find_givs (data);
1044   mark_bivs (data);
1045
1046   if (dump_file && (dump_flags & TDF_DETAILS))
1047     {
1048       tree niter = niter_for_single_dom_exit (data);
1049
1050       if (niter)
1051         {
1052           fprintf (dump_file, "  number of iterations ");
1053           print_generic_expr (dump_file, niter, TDF_SLIM);
1054           fprintf (dump_file, "\n\n");
1055         };
1056
1057       fprintf (dump_file, "Induction variables:\n\n");
1058
1059       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1060         {
1061           if (ver_info (data, i)->iv)
1062             dump_iv (dump_file, ver_info (data, i)->iv);
1063         }
1064     }
1065
1066   return true;
1067 }
1068
1069 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1070
1071 static struct iv_use *
1072 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1073             tree stmt, enum use_type use_type)
1074 {
1075   struct iv_use *use = XCNEW (struct iv_use);
1076
1077   use->id = n_iv_uses (data);
1078   use->type = use_type;
1079   use->iv = iv;
1080   use->stmt = stmt;
1081   use->op_p = use_p;
1082   use->related_cands = BITMAP_ALLOC (NULL);
1083
1084   /* To avoid showing ssa name in the dumps, if it was not reset by the
1085      caller.  */
1086   iv->ssa_name = NULL_TREE;
1087
1088   if (dump_file && (dump_flags & TDF_DETAILS))
1089     dump_use (dump_file, use);
1090
1091   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1092
1093   return use;
1094 }
1095
1096 /* Checks whether OP is a loop-level invariant and if so, records it.
1097    NONLINEAR_USE is true if the invariant is used in a way we do not
1098    handle specially.  */
1099
1100 static void
1101 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1102 {
1103   basic_block bb;
1104   struct version_info *info;
1105
1106   if (TREE_CODE (op) != SSA_NAME
1107       || !is_gimple_reg (op))
1108     return;
1109
1110   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1111   if (bb
1112       && flow_bb_inside_loop_p (data->current_loop, bb))
1113     return;
1114
1115   info = name_info (data, op);
1116   info->name = op;
1117   info->has_nonlin_use |= nonlinear_use;
1118   if (!info->inv_id)
1119     info->inv_id = ++data->max_inv_id;
1120   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1121 }
1122
1123 /* Checks whether the use OP is interesting and if so, records it.  */
1124
1125 static struct iv_use *
1126 find_interesting_uses_op (struct ivopts_data *data, tree op)
1127 {
1128   struct iv *iv;
1129   struct iv *civ;
1130   tree stmt;
1131   struct iv_use *use;
1132
1133   if (TREE_CODE (op) != SSA_NAME)
1134     return NULL;
1135
1136   iv = get_iv (data, op);
1137   if (!iv)
1138     return NULL;
1139
1140   if (iv->have_use_for)
1141     {
1142       use = iv_use (data, iv->use_id);
1143
1144       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1145       return use;
1146     }
1147
1148   if (integer_zerop (iv->step))
1149     {
1150       record_invariant (data, op, true);
1151       return NULL;
1152     }
1153   iv->have_use_for = true;
1154
1155   civ = XNEW (struct iv);
1156   *civ = *iv;
1157
1158   stmt = SSA_NAME_DEF_STMT (op);
1159   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1160               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1161
1162   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1163   iv->use_id = use->id;
1164
1165   return use;
1166 }
1167
1168 /* Given a condition *COND_P, checks whether it is a compare of an induction
1169    variable and an invariant.  If this is the case, CONTROL_VAR is set
1170    to location of the iv, BOUND to the location of the invariant,
1171    IV_VAR and IV_BOUND are set to the corresponding induction variable
1172    descriptions, and true is returned.  If this is not the case,
1173    CONTROL_VAR and BOUND are set to the arguments of the condition and
1174    false is returned.  */
1175
1176 static bool
1177 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1178                        tree **control_var, tree **bound,
1179                        struct iv **iv_var, struct iv **iv_bound)
1180 {
1181   /* The nodes returned when COND has just one operand.  Note that you should
1182      not modify anything in BOUND or IV_BOUND because of this.  */
1183   static struct iv const_iv;
1184   static tree zero;
1185   tree cond = *cond_p;
1186   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1187   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1188   bool ret = false;
1189
1190   zero = integer_zero_node;
1191   const_iv.step = integer_zero_node;
1192
1193   if (TREE_CODE (cond) == SSA_NAME)
1194     {
1195       op0 = cond_p;
1196       iv0 = get_iv (data, cond);
1197       ret = (iv0 && !integer_zerop (iv0->step));
1198       goto end;
1199     }
1200
1201   if (!COMPARISON_CLASS_P (cond))
1202     {
1203       op0 = cond_p;
1204       goto end;
1205     }
1206
1207   op0 = &TREE_OPERAND (cond, 0);
1208   op1 = &TREE_OPERAND (cond, 1);
1209   if (TREE_CODE (*op0) == SSA_NAME)
1210     iv0 = get_iv (data, *op0);
1211   if (TREE_CODE (*op1) == SSA_NAME)
1212     iv1 = get_iv (data, *op1);
1213
1214   /* Exactly one of the compared values must be an iv, and the other one must
1215      be an invariant.  */
1216   if (!iv0 || !iv1)
1217     goto end;
1218
1219   if (integer_zerop (iv0->step))
1220     {
1221       /* Control variable may be on the other side.  */
1222       tmp_op = op0; op0 = op1; op1 = tmp_op;
1223       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1224     }
1225   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1226
1227 end:
1228   if (control_var)
1229     *control_var = op0;;
1230   if (iv_var)
1231     *iv_var = iv0;;
1232   if (bound)
1233     *bound = op1;
1234   if (iv_bound)
1235     *iv_bound = iv1;
1236
1237   return ret;
1238 }
1239
1240 /* Checks whether the condition *COND_P in STMT is interesting
1241    and if so, records it.  */
1242
1243 static void
1244 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1245 {
1246   tree *var_p, *bound_p;
1247   struct iv *var_iv, *civ;
1248
1249   if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1250     {
1251       find_interesting_uses_op (data, *var_p);
1252       find_interesting_uses_op (data, *bound_p);
1253       return;
1254     }
1255
1256   civ = XNEW (struct iv);
1257   *civ = *var_iv;
1258   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1259 }
1260
1261 /* Returns true if expression EXPR is obviously invariant in LOOP,
1262    i.e. if all its operands are defined outside of the LOOP.  LOOP
1263    should not be the function body.  */
1264
1265 bool
1266 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1267 {
1268   basic_block def_bb;
1269   unsigned i, len;
1270
1271   gcc_assert (loop_depth (loop) > 0);
1272
1273   if (is_gimple_min_invariant (expr))
1274     return true;
1275
1276   if (TREE_CODE (expr) == SSA_NAME)
1277     {
1278       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1279       if (def_bb
1280           && flow_bb_inside_loop_p (loop, def_bb))
1281         return false;
1282
1283       return true;
1284     }
1285
1286   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1287     return false;
1288
1289   len = TREE_OPERAND_LENGTH (expr);
1290   for (i = 0; i < len; i++)
1291     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1292       return false;
1293
1294   return true;
1295 }
1296
1297 /* Cumulates the steps of indices into DATA and replaces their values with the
1298    initial ones.  Returns false when the value of the index cannot be determined.
1299    Callback for for_each_index.  */
1300
1301 struct ifs_ivopts_data
1302 {
1303   struct ivopts_data *ivopts_data;
1304   tree stmt;
1305   tree step;
1306 };
1307
1308 static bool
1309 idx_find_step (tree base, tree *idx, void *data)
1310 {
1311   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1312   struct iv *iv;
1313   tree step, iv_base, iv_step, lbound, off;
1314   struct loop *loop = dta->ivopts_data->current_loop;
1315
1316   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1317       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1318     return false;
1319
1320   /* If base is a component ref, require that the offset of the reference
1321      be invariant.  */
1322   if (TREE_CODE (base) == COMPONENT_REF)
1323     {
1324       off = component_ref_field_offset (base);
1325       return expr_invariant_in_loop_p (loop, off);
1326     }
1327
1328   /* If base is array, first check whether we will be able to move the
1329      reference out of the loop (in order to take its address in strength
1330      reduction).  In order for this to work we need both lower bound
1331      and step to be loop invariants.  */
1332   if (TREE_CODE (base) == ARRAY_REF)
1333     {
1334       step = array_ref_element_size (base);
1335       lbound = array_ref_low_bound (base);
1336
1337       if (!expr_invariant_in_loop_p (loop, step)
1338           || !expr_invariant_in_loop_p (loop, lbound))
1339         return false;
1340     }
1341
1342   if (TREE_CODE (*idx) != SSA_NAME)
1343     return true;
1344
1345   iv = get_iv (dta->ivopts_data, *idx);
1346   if (!iv)
1347     return false;
1348
1349   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1350           *&x[0], which is not folded and does not trigger the
1351           ARRAY_REF path below.  */
1352   *idx = iv->base;
1353
1354   if (integer_zerop (iv->step))
1355     return true;
1356
1357   if (TREE_CODE (base) == ARRAY_REF)
1358     {
1359       step = array_ref_element_size (base);
1360
1361       /* We only handle addresses whose step is an integer constant.  */
1362       if (TREE_CODE (step) != INTEGER_CST)
1363         return false;
1364     }
1365   else
1366     /* The step for pointer arithmetics already is 1 byte.  */
1367     step = build_int_cst (sizetype, 1);
1368
1369   iv_base = iv->base;
1370   iv_step = iv->step;
1371   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1372                             sizetype, &iv_base, &iv_step, dta->stmt,
1373                             false))
1374     {
1375       /* The index might wrap.  */
1376       return false;
1377     }
1378
1379   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1380   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1381
1382   return true;
1383 }
1384
1385 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1386    object is passed to it in DATA.  */
1387
1388 static bool
1389 idx_record_use (tree base, tree *idx,
1390                 void *vdata)
1391 {
1392   struct ivopts_data *data = (struct ivopts_data *) vdata;
1393   find_interesting_uses_op (data, *idx);
1394   if (TREE_CODE (base) == ARRAY_REF)
1395     {
1396       find_interesting_uses_op (data, array_ref_element_size (base));
1397       find_interesting_uses_op (data, array_ref_low_bound (base));
1398     }
1399   return true;
1400 }
1401
1402 /* If we can prove that TOP = cst * BOT for some constant cst,
1403    store cst to MUL and return true.  Otherwise return false.
1404    The returned value is always sign-extended, regardless of the
1405    signedness of TOP and BOT.  */
1406
1407 static bool
1408 constant_multiple_of (tree top, tree bot, double_int *mul)
1409 {
1410   tree mby;
1411   enum tree_code code;
1412   double_int res, p0, p1;
1413   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1414
1415   STRIP_NOPS (top);
1416   STRIP_NOPS (bot);
1417
1418   if (operand_equal_p (top, bot, 0))
1419     {
1420       *mul = double_int_one;
1421       return true;
1422     }
1423
1424   code = TREE_CODE (top);
1425   switch (code)
1426     {
1427     case MULT_EXPR:
1428       mby = TREE_OPERAND (top, 1);
1429       if (TREE_CODE (mby) != INTEGER_CST)
1430         return false;
1431
1432       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1433         return false;
1434
1435       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1436                               precision);
1437       return true;
1438
1439     case PLUS_EXPR:
1440     case MINUS_EXPR:
1441       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1442           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1443         return false;
1444
1445       if (code == MINUS_EXPR)
1446         p1 = double_int_neg (p1);
1447       *mul = double_int_sext (double_int_add (p0, p1), precision);
1448       return true;
1449
1450     case INTEGER_CST:
1451       if (TREE_CODE (bot) != INTEGER_CST)
1452         return false;
1453
1454       p0 = double_int_sext (tree_to_double_int (top), precision);
1455       p1 = double_int_sext (tree_to_double_int (bot), precision);
1456       if (double_int_zero_p (p1))
1457         return false;
1458       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1459                               precision);
1460       return double_int_zero_p (res);
1461
1462     default:
1463       return false;
1464     }
1465 }
1466
1467 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1468
1469 static bool
1470 may_be_unaligned_p (tree ref, tree step)
1471 {
1472   tree base;
1473   tree base_type;
1474   HOST_WIDE_INT bitsize;
1475   HOST_WIDE_INT bitpos;
1476   tree toffset;
1477   enum machine_mode mode;
1478   int unsignedp, volatilep;
1479   unsigned base_align;
1480
1481   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1482      thus they are not misaligned.  */
1483   if (TREE_CODE (ref) == TARGET_MEM_REF)
1484     return false;
1485
1486   /* The test below is basically copy of what expr.c:normal_inner_ref
1487      does to check whether the object must be loaded by parts when
1488      STRICT_ALIGNMENT is true.  */
1489   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1490                               &unsignedp, &volatilep, true);
1491   base_type = TREE_TYPE (base);
1492   base_align = TYPE_ALIGN (base_type);
1493
1494   if (mode != BLKmode)
1495     {
1496       double_int mul;
1497       tree al = build_int_cst (TREE_TYPE (step),
1498                                GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1499
1500       if (base_align < GET_MODE_ALIGNMENT (mode)
1501           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1502           || bitpos % BITS_PER_UNIT != 0)
1503         return true;
1504
1505       if (!constant_multiple_of (step, al, &mul))
1506         return true;
1507     }
1508
1509   return false;
1510 }
1511
1512 /* Return true if EXPR may be non-addressable.   */
1513
1514 static bool
1515 may_be_nonaddressable_p (tree expr)
1516 {
1517   switch (TREE_CODE (expr))
1518     {
1519     case TARGET_MEM_REF:
1520       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1521          target, thus they are always addressable.  */
1522       return false;
1523
1524     case COMPONENT_REF:
1525       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1526              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1527
1528     case VIEW_CONVERT_EXPR:
1529       /* This kind of view-conversions may wrap non-addressable objects
1530          and make them look addressable.  After some processing the
1531          non-addressability may be uncovered again, causing ADDR_EXPRs
1532          of inappropriate objects to be built.  */
1533       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1534           || is_gimple_min_invariant (TREE_OPERAND (expr, 0)))
1535         return true;
1536
1537       /* ... fall through ... */
1538
1539     case ARRAY_REF:
1540     case ARRAY_RANGE_REF:
1541       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1542
1543     case CONVERT_EXPR:
1544     case NOP_EXPR:
1545       return true;
1546
1547     default:
1548       break;
1549     }
1550
1551   return false;
1552 }
1553
1554 /* Finds addresses in *OP_P inside STMT.  */
1555
1556 static void
1557 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1558 {
1559   tree base = *op_p, step = build_int_cst (sizetype, 0);
1560   struct iv *civ;
1561   struct ifs_ivopts_data ifs_ivopts_data;
1562
1563   /* Do not play with volatile memory references.  A bit too conservative,
1564      perhaps, but safe.  */
1565   if (stmt_ann (stmt)->has_volatile_ops)
1566     goto fail;
1567
1568   /* Ignore bitfields for now.  Not really something terribly complicated
1569      to handle.  TODO.  */
1570   if (TREE_CODE (base) == BIT_FIELD_REF)
1571     goto fail;
1572
1573   base = unshare_expr (base);
1574
1575   if (TREE_CODE (base) == TARGET_MEM_REF)
1576     {
1577       tree type = build_pointer_type (TREE_TYPE (base));
1578       tree astep;
1579
1580       if (TMR_BASE (base)
1581           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1582         {
1583           civ = get_iv (data, TMR_BASE (base));
1584           if (!civ)
1585             goto fail;
1586
1587           TMR_BASE (base) = civ->base;
1588           step = civ->step;
1589         }
1590       if (TMR_INDEX (base)
1591           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1592         {
1593           civ = get_iv (data, TMR_INDEX (base));
1594           if (!civ)
1595             goto fail;
1596
1597           TMR_INDEX (base) = civ->base;
1598           astep = civ->step;
1599
1600           if (astep)
1601             {
1602               if (TMR_STEP (base))
1603                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1604
1605               step = fold_build2 (PLUS_EXPR, type, step, astep);
1606             }
1607         }
1608
1609       if (integer_zerop (step))
1610         goto fail;
1611       base = tree_mem_ref_addr (type, base);
1612     }
1613   else
1614     {
1615       ifs_ivopts_data.ivopts_data = data;
1616       ifs_ivopts_data.stmt = stmt;
1617       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1618       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1619           || integer_zerop (ifs_ivopts_data.step))
1620         goto fail;
1621       step = ifs_ivopts_data.step;
1622
1623       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1624       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1625
1626       /* Check that the base expression is addressable.  This needs
1627          to be done after substituting bases of IVs into it.  */
1628       if (may_be_nonaddressable_p (base))
1629         goto fail;
1630
1631       /* Moreover, on strict alignment platforms, check that it is
1632          sufficiently aligned.  */
1633       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1634         goto fail;
1635
1636       base = build_fold_addr_expr (base);
1637
1638       /* Substituting bases of IVs into the base expression might
1639          have caused folding opportunities.  */
1640       if (TREE_CODE (base) == ADDR_EXPR)
1641         {
1642           tree *ref = &TREE_OPERAND (base, 0);
1643           while (handled_component_p (*ref))
1644             ref = &TREE_OPERAND (*ref, 0);
1645           if (TREE_CODE (*ref) == INDIRECT_REF)
1646             *ref = fold_indirect_ref (*ref);
1647         }
1648     }
1649
1650   civ = alloc_iv (base, step);
1651   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1652   return;
1653
1654 fail:
1655   for_each_index (op_p, idx_record_use, data);
1656 }
1657
1658 /* Finds and records invariants used in STMT.  */
1659
1660 static void
1661 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1662 {
1663   ssa_op_iter iter;
1664   use_operand_p use_p;
1665   tree op;
1666
1667   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1668     {
1669       op = USE_FROM_PTR (use_p);
1670       record_invariant (data, op, false);
1671     }
1672 }
1673
1674 /* Finds interesting uses of induction variables in the statement STMT.  */
1675
1676 static void
1677 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1678 {
1679   struct iv *iv;
1680   tree op, lhs, rhs;
1681   ssa_op_iter iter;
1682   use_operand_p use_p;
1683
1684   find_invariants_stmt (data, stmt);
1685
1686   if (TREE_CODE (stmt) == COND_EXPR)
1687     {
1688       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1689       return;
1690     }
1691
1692   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1693     {
1694       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1695       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1696
1697       if (TREE_CODE (lhs) == SSA_NAME)
1698         {
1699           /* If the statement defines an induction variable, the uses are not
1700              interesting by themselves.  */
1701
1702           iv = get_iv (data, lhs);
1703
1704           if (iv && !integer_zerop (iv->step))
1705             return;
1706         }
1707
1708       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1709         {
1710         case tcc_comparison:
1711           find_interesting_uses_cond (data, stmt,
1712                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1713           return;
1714
1715         case tcc_reference:
1716           find_interesting_uses_address (data, stmt,
1717                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1718           if (REFERENCE_CLASS_P (lhs))
1719             find_interesting_uses_address (data, stmt,
1720                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1721           return;
1722
1723         default: ;
1724         }
1725
1726       if (REFERENCE_CLASS_P (lhs)
1727           && is_gimple_val (rhs))
1728         {
1729           find_interesting_uses_address (data, stmt,
1730                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1731           find_interesting_uses_op (data, rhs);
1732           return;
1733         }
1734
1735       /* TODO -- we should also handle address uses of type
1736
1737          memory = call (whatever);
1738
1739          and
1740
1741          call (memory).  */
1742     }
1743
1744   if (TREE_CODE (stmt) == PHI_NODE
1745       && bb_for_stmt (stmt) == data->current_loop->header)
1746     {
1747       lhs = PHI_RESULT (stmt);
1748       iv = get_iv (data, lhs);
1749
1750       if (iv && !integer_zerop (iv->step))
1751         return;
1752     }
1753
1754   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1755     {
1756       op = USE_FROM_PTR (use_p);
1757
1758       if (TREE_CODE (op) != SSA_NAME)
1759         continue;
1760
1761       iv = get_iv (data, op);
1762       if (!iv)
1763         continue;
1764
1765       find_interesting_uses_op (data, op);
1766     }
1767 }
1768
1769 /* Finds interesting uses of induction variables outside of loops
1770    on loop exit edge EXIT.  */
1771
1772 static void
1773 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1774 {
1775   tree phi, def;
1776
1777   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1778     {
1779       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1780       if (is_gimple_reg (def))
1781         find_interesting_uses_op (data, def);
1782     }
1783 }
1784
1785 /* Finds uses of the induction variables that are interesting.  */
1786
1787 static void
1788 find_interesting_uses (struct ivopts_data *data)
1789 {
1790   basic_block bb;
1791   block_stmt_iterator bsi;
1792   tree phi;
1793   basic_block *body = get_loop_body (data->current_loop);
1794   unsigned i;
1795   struct version_info *info;
1796   edge e;
1797
1798   if (dump_file && (dump_flags & TDF_DETAILS))
1799     fprintf (dump_file, "Uses:\n\n");
1800
1801   for (i = 0; i < data->current_loop->num_nodes; i++)
1802     {
1803       edge_iterator ei;
1804       bb = body[i];
1805
1806       FOR_EACH_EDGE (e, ei, bb->succs)
1807         if (e->dest != EXIT_BLOCK_PTR
1808             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1809           find_interesting_uses_outside (data, e);
1810
1811       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1812         find_interesting_uses_stmt (data, phi);
1813       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1814         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1815     }
1816
1817   if (dump_file && (dump_flags & TDF_DETAILS))
1818     {
1819       bitmap_iterator bi;
1820
1821       fprintf (dump_file, "\n");
1822
1823       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1824         {
1825           info = ver_info (data, i);
1826           if (info->inv_id)
1827             {
1828               fprintf (dump_file, "  ");
1829               print_generic_expr (dump_file, info->name, TDF_SLIM);
1830               fprintf (dump_file, " is invariant (%d)%s\n",
1831                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1832             }
1833         }
1834
1835       fprintf (dump_file, "\n");
1836     }
1837
1838   free (body);
1839 }
1840
1841 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1842    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1843    we are at the top-level of the processed address.  */
1844
1845 static tree
1846 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1847                 unsigned HOST_WIDE_INT *offset)
1848 {
1849   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1850   enum tree_code code;
1851   tree type, orig_type = TREE_TYPE (expr);
1852   unsigned HOST_WIDE_INT off0, off1, st;
1853   tree orig_expr = expr;
1854
1855   STRIP_NOPS (expr);
1856
1857   type = TREE_TYPE (expr);
1858   code = TREE_CODE (expr);
1859   *offset = 0;
1860
1861   switch (code)
1862     {
1863     case INTEGER_CST:
1864       if (!cst_and_fits_in_hwi (expr)
1865           || integer_zerop (expr))
1866         return orig_expr;
1867
1868       *offset = int_cst_value (expr);
1869       return build_int_cst (orig_type, 0);
1870
1871     case POINTER_PLUS_EXPR:
1872     case PLUS_EXPR:
1873     case MINUS_EXPR:
1874       op0 = TREE_OPERAND (expr, 0);
1875       op1 = TREE_OPERAND (expr, 1);
1876
1877       op0 = strip_offset_1 (op0, false, false, &off0);
1878       op1 = strip_offset_1 (op1, false, false, &off1);
1879
1880       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1881       if (op0 == TREE_OPERAND (expr, 0)
1882           && op1 == TREE_OPERAND (expr, 1))
1883         return orig_expr;
1884
1885       if (integer_zerop (op1))
1886         expr = op0;
1887       else if (integer_zerop (op0))
1888         {
1889           if (code == MINUS_EXPR)
1890             expr = fold_build1 (NEGATE_EXPR, type, op1);
1891           else
1892             expr = op1;
1893         }
1894       else
1895         expr = fold_build2 (code, type, op0, op1);
1896
1897       return fold_convert (orig_type, expr);
1898
1899     case ARRAY_REF:
1900       if (!inside_addr)
1901         return orig_expr;
1902
1903       step = array_ref_element_size (expr);
1904       if (!cst_and_fits_in_hwi (step))
1905         break;
1906
1907       st = int_cst_value (step);
1908       op1 = TREE_OPERAND (expr, 1);
1909       op1 = strip_offset_1 (op1, false, false, &off1);
1910       *offset = off1 * st;
1911
1912       if (top_compref
1913           && integer_zerop (op1))
1914         {
1915           /* Strip the component reference completely.  */
1916           op0 = TREE_OPERAND (expr, 0);
1917           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1918           *offset += off0;
1919           return op0;
1920         }
1921       break;
1922
1923     case COMPONENT_REF:
1924       if (!inside_addr)
1925         return orig_expr;
1926
1927       tmp = component_ref_field_offset (expr);
1928       if (top_compref
1929           && cst_and_fits_in_hwi (tmp))
1930         {
1931           /* Strip the component reference completely.  */
1932           op0 = TREE_OPERAND (expr, 0);
1933           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1934           *offset = off0 + int_cst_value (tmp);
1935           return op0;
1936         }
1937       break;
1938
1939     case ADDR_EXPR:
1940       op0 = TREE_OPERAND (expr, 0);
1941       op0 = strip_offset_1 (op0, true, true, &off0);
1942       *offset += off0;
1943
1944       if (op0 == TREE_OPERAND (expr, 0))
1945         return orig_expr;
1946
1947       expr = build_fold_addr_expr (op0);
1948       return fold_convert (orig_type, expr);
1949
1950     case INDIRECT_REF:
1951       inside_addr = false;
1952       break;
1953
1954     default:
1955       return orig_expr;
1956     }
1957
1958   /* Default handling of expressions for that we want to recurse into
1959      the first operand.  */
1960   op0 = TREE_OPERAND (expr, 0);
1961   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1962   *offset += off0;
1963
1964   if (op0 == TREE_OPERAND (expr, 0)
1965       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1966     return orig_expr;
1967
1968   expr = copy_node (expr);
1969   TREE_OPERAND (expr, 0) = op0;
1970   if (op1)
1971     TREE_OPERAND (expr, 1) = op1;
1972
1973   /* Inside address, we might strip the top level component references,
1974      thus changing type of the expression.  Handling of ADDR_EXPR
1975      will fix that.  */
1976   expr = fold_convert (orig_type, expr);
1977
1978   return expr;
1979 }
1980
1981 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1982
1983 static tree
1984 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1985 {
1986   return strip_offset_1 (expr, false, false, offset);
1987 }
1988
1989 /* Returns variant of TYPE that can be used as base for different uses.
1990    We return unsigned type with the same precision, which avoids problems
1991    with overflows.  */
1992
1993 static tree
1994 generic_type_for (tree type)
1995 {
1996   if (POINTER_TYPE_P (type))
1997     return unsigned_type_for (type);
1998
1999   if (TYPE_UNSIGNED (type))
2000     return type;
2001
2002   return unsigned_type_for (type);
2003 }
2004
2005 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2006    the bitmap to that we should store it.  */
2007
2008 static struct ivopts_data *fd_ivopts_data;
2009 static tree
2010 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2011 {
2012   bitmap *depends_on = (bitmap *) data;
2013   struct version_info *info;
2014
2015   if (TREE_CODE (*expr_p) != SSA_NAME)
2016     return NULL_TREE;
2017   info = name_info (fd_ivopts_data, *expr_p);
2018
2019   if (!info->inv_id || info->has_nonlin_use)
2020     return NULL_TREE;
2021
2022   if (!*depends_on)
2023     *depends_on = BITMAP_ALLOC (NULL);
2024   bitmap_set_bit (*depends_on, info->inv_id);
2025
2026   return NULL_TREE;
2027 }
2028
2029 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2030    position to POS.  If USE is not NULL, the candidate is set as related to
2031    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2032    replacement of the final value of the iv by a direct computation.  */
2033
2034 static struct iv_cand *
2035 add_candidate_1 (struct ivopts_data *data,
2036                  tree base, tree step, bool important, enum iv_position pos,
2037                  struct iv_use *use, tree incremented_at)
2038 {
2039   unsigned i;
2040   struct iv_cand *cand = NULL;
2041   tree type, orig_type;
2042
2043   if (base)
2044     {
2045       orig_type = TREE_TYPE (base);
2046       type = generic_type_for (orig_type);
2047       /* Don't convert the base to the generic type for pointers as the generic
2048          type is an integer type with the same size as the pointer type.  */
2049       if (type != orig_type && !POINTER_TYPE_P (orig_type))
2050         {
2051           base = fold_convert (type, base);
2052           step = fold_convert (type, step);
2053         }
2054     }
2055
2056   for (i = 0; i < n_iv_cands (data); i++)
2057     {
2058       cand = iv_cand (data, i);
2059
2060       if (cand->pos != pos)
2061         continue;
2062
2063       if (cand->incremented_at != incremented_at)
2064         continue;
2065
2066       if (!cand->iv)
2067         {
2068           if (!base && !step)
2069             break;
2070
2071           continue;
2072         }
2073
2074       if (!base && !step)
2075         continue;
2076
2077       if (operand_equal_p (base, cand->iv->base, 0)
2078           && operand_equal_p (step, cand->iv->step, 0))
2079         break;
2080     }
2081
2082   if (i == n_iv_cands (data))
2083     {
2084       cand = XCNEW (struct iv_cand);
2085       cand->id = i;
2086
2087       if (!base && !step)
2088         cand->iv = NULL;
2089       else
2090         cand->iv = alloc_iv (base, step);
2091
2092       cand->pos = pos;
2093       if (pos != IP_ORIGINAL && cand->iv)
2094         {
2095           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2096           cand->var_after = cand->var_before;
2097         }
2098       cand->important = important;
2099       cand->incremented_at = incremented_at;
2100       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2101
2102       if (step
2103           && TREE_CODE (step) != INTEGER_CST)
2104         {
2105           fd_ivopts_data = data;
2106           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2107         }
2108
2109       if (dump_file && (dump_flags & TDF_DETAILS))
2110         dump_cand (dump_file, cand);
2111     }
2112
2113   if (important && !cand->important)
2114     {
2115       cand->important = true;
2116       if (dump_file && (dump_flags & TDF_DETAILS))
2117         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2118     }
2119
2120   if (use)
2121     {
2122       bitmap_set_bit (use->related_cands, i);
2123       if (dump_file && (dump_flags & TDF_DETAILS))
2124         fprintf (dump_file, "Candidate %d is related to use %d\n",
2125                  cand->id, use->id);
2126     }
2127
2128   return cand;
2129 }
2130
2131 /* Returns true if incrementing the induction variable at the end of the LOOP
2132    is allowed.
2133
2134    The purpose is to avoid splitting latch edge with a biv increment, thus
2135    creating a jump, possibly confusing other optimization passes and leaving
2136    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2137    is not available (so we do not have a better alternative), or if the latch
2138    edge is already nonempty.  */
2139
2140 static bool
2141 allow_ip_end_pos_p (struct loop *loop)
2142 {
2143   if (!ip_normal_pos (loop))
2144     return true;
2145
2146   if (!empty_block_p (ip_end_pos (loop)))
2147     return true;
2148
2149   return false;
2150 }
2151
2152 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2153    position to POS.  If USE is not NULL, the candidate is set as related to
2154    it.  The candidate computation is scheduled on all available positions.  */
2155
2156 static void
2157 add_candidate (struct ivopts_data *data,
2158                tree base, tree step, bool important, struct iv_use *use)
2159 {
2160   if (ip_normal_pos (data->current_loop))
2161     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2162   if (ip_end_pos (data->current_loop)
2163       && allow_ip_end_pos_p (data->current_loop))
2164     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2165 }
2166
2167 /* Add a standard "0 + 1 * iteration" iv candidate for a
2168    type with SIZE bits.  */
2169
2170 static void
2171 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2172                                      unsigned int size)
2173 {
2174   tree type = lang_hooks.types.type_for_size (size, true);
2175   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2176                  true, NULL);
2177 }
2178
2179 /* Adds standard iv candidates.  */
2180
2181 static void
2182 add_standard_iv_candidates (struct ivopts_data *data)
2183 {
2184   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2185
2186   /* The same for a double-integer type if it is still fast enough.  */
2187   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2188     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2189 }
2190
2191
2192 /* Adds candidates bases on the old induction variable IV.  */
2193
2194 static void
2195 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2196 {
2197   tree phi, def;
2198   struct iv_cand *cand;
2199
2200   add_candidate (data, iv->base, iv->step, true, NULL);
2201
2202   /* The same, but with initial value zero.  */
2203   add_candidate (data,
2204                  build_int_cst (TREE_TYPE (iv->base), 0),
2205                  iv->step, true, NULL);
2206
2207   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2208   if (TREE_CODE (phi) == PHI_NODE)
2209     {
2210       /* Additionally record the possibility of leaving the original iv
2211          untouched.  */
2212       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2213       cand = add_candidate_1 (data,
2214                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2215                               SSA_NAME_DEF_STMT (def));
2216       cand->var_before = iv->ssa_name;
2217       cand->var_after = def;
2218     }
2219 }
2220
2221 /* Adds candidates based on the old induction variables.  */
2222
2223 static void
2224 add_old_ivs_candidates (struct ivopts_data *data)
2225 {
2226   unsigned i;
2227   struct iv *iv;
2228   bitmap_iterator bi;
2229
2230   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2231     {
2232       iv = ver_info (data, i)->iv;
2233       if (iv && iv->biv_p && !integer_zerop (iv->step))
2234         add_old_iv_candidates (data, iv);
2235     }
2236 }
2237
2238 /* Adds candidates based on the value of the induction variable IV and USE.  */
2239
2240 static void
2241 add_iv_value_candidates (struct ivopts_data *data,
2242                          struct iv *iv, struct iv_use *use)
2243 {
2244   unsigned HOST_WIDE_INT offset;
2245   tree base;
2246   tree basetype;
2247
2248   add_candidate (data, iv->base, iv->step, false, use);
2249
2250   /* The same, but with initial value zero.  Make such variable important,
2251      since it is generic enough so that possibly many uses may be based
2252      on it.  */
2253   basetype = TREE_TYPE (iv->base);
2254   if (POINTER_TYPE_P (basetype))
2255     basetype = sizetype;
2256   add_candidate (data, build_int_cst (basetype, 0),
2257                  iv->step, true, use);
2258
2259   /* Third, try removing the constant offset.  */
2260   base = strip_offset (iv->base, &offset);
2261   if (offset)
2262     add_candidate (data, base, iv->step, false, use);
2263 }
2264
2265 /* Adds candidates based on the uses.  */
2266
2267 static void
2268 add_derived_ivs_candidates (struct ivopts_data *data)
2269 {
2270   unsigned i;
2271
2272   for (i = 0; i < n_iv_uses (data); i++)
2273     {
2274       struct iv_use *use = iv_use (data, i);
2275
2276       if (!use)
2277         continue;
2278
2279       switch (use->type)
2280         {
2281         case USE_NONLINEAR_EXPR:
2282         case USE_COMPARE:
2283         case USE_ADDRESS:
2284           /* Just add the ivs based on the value of the iv used here.  */
2285           add_iv_value_candidates (data, use->iv, use);
2286           break;
2287
2288         default:
2289           gcc_unreachable ();
2290         }
2291     }
2292 }
2293
2294 /* Record important candidates and add them to related_cands bitmaps
2295    if needed.  */
2296
2297 static void
2298 record_important_candidates (struct ivopts_data *data)
2299 {
2300   unsigned i;
2301   struct iv_use *use;
2302
2303   for (i = 0; i < n_iv_cands (data); i++)
2304     {
2305       struct iv_cand *cand = iv_cand (data, i);
2306
2307       if (cand->important)
2308         bitmap_set_bit (data->important_candidates, i);
2309     }
2310
2311   data->consider_all_candidates = (n_iv_cands (data)
2312                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2313
2314   if (data->consider_all_candidates)
2315     {
2316       /* We will not need "related_cands" bitmaps in this case,
2317          so release them to decrease peak memory consumption.  */
2318       for (i = 0; i < n_iv_uses (data); i++)
2319         {
2320           use = iv_use (data, i);
2321           BITMAP_FREE (use->related_cands);
2322         }
2323     }
2324   else
2325     {
2326       /* Add important candidates to the related_cands bitmaps.  */
2327       for (i = 0; i < n_iv_uses (data); i++)
2328         bitmap_ior_into (iv_use (data, i)->related_cands,
2329                          data->important_candidates);
2330     }
2331 }
2332
2333 /* Finds the candidates for the induction variables.  */
2334
2335 static void
2336 find_iv_candidates (struct ivopts_data *data)
2337 {
2338   /* Add commonly used ivs.  */
2339   add_standard_iv_candidates (data);
2340
2341   /* Add old induction variables.  */
2342   add_old_ivs_candidates (data);
2343
2344   /* Add induction variables derived from uses.  */
2345   add_derived_ivs_candidates (data);
2346
2347   /* Record the important candidates.  */
2348   record_important_candidates (data);
2349 }
2350
2351 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2352    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2353    we allocate a simple list to every use.  */
2354
2355 static void
2356 alloc_use_cost_map (struct ivopts_data *data)
2357 {
2358   unsigned i, size, s, j;
2359
2360   for (i = 0; i < n_iv_uses (data); i++)
2361     {
2362       struct iv_use *use = iv_use (data, i);
2363       bitmap_iterator bi;
2364
2365       if (data->consider_all_candidates)
2366         size = n_iv_cands (data);
2367       else
2368         {
2369           s = 0;
2370           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2371             {
2372               s++;
2373             }
2374
2375           /* Round up to the power of two, so that moduling by it is fast.  */
2376           for (size = 1; size < s; size <<= 1)
2377             continue;
2378         }
2379
2380       use->n_map_members = size;
2381       use->cost_map = XCNEWVEC (struct cost_pair, size);
2382     }
2383 }
2384
2385 /* Returns description of computation cost of expression whose runtime
2386    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2387
2388 static comp_cost
2389 new_cost (unsigned runtime, unsigned complexity)
2390 {
2391   comp_cost cost;
2392
2393   cost.cost = runtime;
2394   cost.complexity = complexity;
2395
2396   return cost;
2397 }
2398
2399 /* Adds costs COST1 and COST2.  */
2400
2401 static comp_cost
2402 add_costs (comp_cost cost1, comp_cost cost2)
2403 {
2404   cost1.cost += cost2.cost;
2405   cost1.complexity += cost2.complexity;
2406
2407   return cost1;
2408 }
2409 /* Subtracts costs COST1 and COST2.  */
2410
2411 static comp_cost
2412 sub_costs (comp_cost cost1, comp_cost cost2)
2413 {
2414   cost1.cost -= cost2.cost;
2415   cost1.complexity -= cost2.complexity;
2416
2417   return cost1;
2418 }
2419
2420 /* Returns a negative number if COST1 < COST2, a positive number if
2421    COST1 > COST2, and 0 if COST1 = COST2.  */
2422
2423 static int
2424 compare_costs (comp_cost cost1, comp_cost cost2)
2425 {
2426   if (cost1.cost == cost2.cost)
2427     return cost1.complexity - cost2.complexity;
2428
2429   return cost1.cost - cost2.cost;
2430 }
2431
2432 /* Returns true if COST is infinite.  */
2433
2434 static bool
2435 infinite_cost_p (comp_cost cost)
2436 {
2437   return cost.cost == INFTY;
2438 }
2439
2440 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2441    on invariants DEPENDS_ON and that the value used in expressing it
2442    is VALUE.*/
2443
2444 static void
2445 set_use_iv_cost (struct ivopts_data *data,
2446                  struct iv_use *use, struct iv_cand *cand,
2447                  comp_cost cost, bitmap depends_on, tree value)
2448 {
2449   unsigned i, s;
2450
2451   if (infinite_cost_p (cost))
2452     {
2453       BITMAP_FREE (depends_on);
2454       return;
2455     }
2456
2457   if (data->consider_all_candidates)
2458     {
2459       use->cost_map[cand->id].cand = cand;
2460       use->cost_map[cand->id].cost = cost;
2461       use->cost_map[cand->id].depends_on = depends_on;
2462       use->cost_map[cand->id].value = value;
2463       return;
2464     }
2465
2466   /* n_map_members is a power of two, so this computes modulo.  */
2467   s = cand->id & (use->n_map_members - 1);
2468   for (i = s; i < use->n_map_members; i++)
2469     if (!use->cost_map[i].cand)
2470       goto found;
2471   for (i = 0; i < s; i++)
2472     if (!use->cost_map[i].cand)
2473       goto found;
2474
2475   gcc_unreachable ();
2476
2477 found:
2478   use->cost_map[i].cand = cand;
2479   use->cost_map[i].cost = cost;
2480   use->cost_map[i].depends_on = depends_on;
2481   use->cost_map[i].value = value;
2482 }
2483
2484 /* Gets cost of (USE, CANDIDATE) pair.  */
2485
2486 static struct cost_pair *
2487 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2488                  struct iv_cand *cand)
2489 {
2490   unsigned i, s;
2491   struct cost_pair *ret;
2492
2493   if (!cand)
2494     return NULL;
2495
2496   if (data->consider_all_candidates)
2497     {
2498       ret = use->cost_map + cand->id;
2499       if (!ret->cand)
2500         return NULL;
2501
2502       return ret;
2503     }
2504
2505   /* n_map_members is a power of two, so this computes modulo.  */
2506   s = cand->id & (use->n_map_members - 1);
2507   for (i = s; i < use->n_map_members; i++)
2508     if (use->cost_map[i].cand == cand)
2509       return use->cost_map + i;
2510
2511   for (i = 0; i < s; i++)
2512     if (use->cost_map[i].cand == cand)
2513       return use->cost_map + i;
2514
2515   return NULL;
2516 }
2517
2518 /* Returns estimate on cost of computing SEQ.  */
2519
2520 static unsigned
2521 seq_cost (rtx seq)
2522 {
2523   unsigned cost = 0;
2524   rtx set;
2525
2526   for (; seq; seq = NEXT_INSN (seq))
2527     {
2528       set = single_set (seq);
2529       if (set)
2530         cost += rtx_cost (set, SET);
2531       else
2532         cost++;
2533     }
2534
2535   return cost;
2536 }
2537
2538 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2539 static rtx
2540 produce_memory_decl_rtl (tree obj, int *regno)
2541 {
2542   rtx x;
2543
2544   gcc_assert (obj);
2545   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2546     {
2547       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2548       x = gen_rtx_SYMBOL_REF (Pmode, name);
2549       SET_SYMBOL_REF_DECL (x, obj);
2550       x = gen_rtx_MEM (DECL_MODE (obj), x);
2551       targetm.encode_section_info (obj, x, true);
2552     }
2553   else
2554     {
2555       x = gen_raw_REG (Pmode, (*regno)++);
2556       x = gen_rtx_MEM (DECL_MODE (obj), x);
2557     }
2558
2559   return x;
2560 }
2561
2562 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2563    walk_tree.  DATA contains the actual fake register number.  */
2564
2565 static tree
2566 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2567 {
2568   tree obj = NULL_TREE;
2569   rtx x = NULL_RTX;
2570   int *regno = (int *) data;
2571
2572   switch (TREE_CODE (*expr_p))
2573     {
2574     case ADDR_EXPR:
2575       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2576            handled_component_p (*expr_p);
2577            expr_p = &TREE_OPERAND (*expr_p, 0))
2578         continue;
2579       obj = *expr_p;
2580       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2581         x = produce_memory_decl_rtl (obj, regno);
2582       break;
2583
2584     case SSA_NAME:
2585       *ws = 0;
2586       obj = SSA_NAME_VAR (*expr_p);
2587       if (!DECL_RTL_SET_P (obj))
2588         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2589       break;
2590
2591     case VAR_DECL:
2592     case PARM_DECL:
2593     case RESULT_DECL:
2594       *ws = 0;
2595       obj = *expr_p;
2596
2597       if (DECL_RTL_SET_P (obj))
2598         break;
2599
2600       if (DECL_MODE (obj) == BLKmode)
2601         x = produce_memory_decl_rtl (obj, regno);
2602       else
2603         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2604
2605       break;
2606
2607     default:
2608       break;
2609     }
2610
2611   if (x)
2612     {
2613       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2614       SET_DECL_RTL (obj, x);
2615     }
2616
2617   return NULL_TREE;
2618 }
2619
2620 /* Determines cost of the computation of EXPR.  */
2621
2622 static unsigned
2623 computation_cost (tree expr)
2624 {
2625   rtx seq, rslt;
2626   tree type = TREE_TYPE (expr);
2627   unsigned cost;
2628   /* Avoid using hard regs in ways which may be unsupported.  */
2629   int regno = LAST_VIRTUAL_REGISTER + 1;
2630
2631   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2632   start_sequence ();
2633   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2634   seq = get_insns ();
2635   end_sequence ();
2636
2637   cost = seq_cost (seq);
2638   if (MEM_P (rslt))
2639     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2640
2641   return cost;
2642 }
2643
2644 /* Returns variable containing the value of candidate CAND at statement AT.  */
2645
2646 static tree
2647 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2648 {
2649   if (stmt_after_increment (loop, cand, stmt))
2650     return cand->var_after;
2651   else
2652     return cand->var_before;
2653 }
2654
2655 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2656    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2657
2658 int
2659 tree_int_cst_sign_bit (const_tree t)
2660 {
2661   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2662   unsigned HOST_WIDE_INT w;
2663
2664   if (bitno < HOST_BITS_PER_WIDE_INT)
2665     w = TREE_INT_CST_LOW (t);
2666   else
2667     {
2668       w = TREE_INT_CST_HIGH (t);
2669       bitno -= HOST_BITS_PER_WIDE_INT;
2670     }
2671
2672   return (w >> bitno) & 1;
2673 }
2674
2675 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2676    same precision that is at least as wide as the precision of TYPE, stores
2677    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2678    type of A and B.  */
2679
2680 static tree
2681 determine_common_wider_type (tree *a, tree *b)
2682 {
2683   tree wider_type = NULL;
2684   tree suba, subb;
2685   tree atype = TREE_TYPE (*a);
2686
2687   if ((TREE_CODE (*a) == NOP_EXPR
2688        || TREE_CODE (*a) == CONVERT_EXPR))
2689     {
2690       suba = TREE_OPERAND (*a, 0);
2691       wider_type = TREE_TYPE (suba);
2692       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2693         return atype;
2694     }
2695   else
2696     return atype;
2697
2698   if ((TREE_CODE (*b) == NOP_EXPR
2699        || TREE_CODE (*b) == CONVERT_EXPR))
2700     {
2701       subb = TREE_OPERAND (*b, 0);
2702       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2703         return atype;
2704     }
2705   else
2706     return atype;
2707
2708   *a = suba;
2709   *b = subb;
2710   return wider_type;
2711 }
2712
2713 /* Determines the expression by that USE is expressed from induction variable
2714    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2715    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2716
2717 static bool
2718 get_computation_aff (struct loop *loop,
2719                      struct iv_use *use, struct iv_cand *cand, tree at,
2720                      struct affine_tree_combination *aff)
2721 {
2722   tree ubase = use->iv->base;
2723   tree ustep = use->iv->step;
2724   tree cbase = cand->iv->base;
2725   tree cstep = cand->iv->step, cstep_common;
2726   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2727   tree common_type, var;
2728   tree uutype;
2729   aff_tree cbase_aff, var_aff;
2730   double_int rat;
2731
2732   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2733     {
2734       /* We do not have a precision to express the values of use.  */
2735       return false;
2736     }
2737
2738   var = var_at_stmt (loop, cand, at);
2739   uutype = unsigned_type_for (utype);
2740
2741   /* If the conversion is not noop, perform it.  */
2742   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2743     {
2744       cstep = fold_convert (uutype, cstep);
2745       cbase = fold_convert (uutype, cbase);
2746       var = fold_convert (uutype, var);
2747     }
2748
2749   if (!constant_multiple_of (ustep, cstep, &rat))
2750     return false;
2751
2752   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2753      type, we achieve better folding by computing their difference in this
2754      wider type, and cast the result to UUTYPE.  We do not need to worry about
2755      overflows, as all the arithmetics will in the end be performed in UUTYPE
2756      anyway.  */
2757   common_type = determine_common_wider_type (&ubase, &cbase);
2758
2759   /* use = ubase - ratio * cbase + ratio * var.  */
2760   tree_to_aff_combination (ubase, common_type, aff);
2761   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2762   tree_to_aff_combination (var, uutype, &var_aff);
2763
2764   /* We need to shift the value if we are after the increment.  */
2765   if (stmt_after_increment (loop, cand, at))
2766     {
2767       aff_tree cstep_aff;
2768
2769       if (common_type != uutype)
2770         cstep_common = fold_convert (common_type, cstep);
2771       else
2772         cstep_common = cstep;
2773
2774       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2775       aff_combination_add (&cbase_aff, &cstep_aff);
2776     }
2777
2778   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2779   aff_combination_add (aff, &cbase_aff);
2780   if (common_type != uutype)
2781     aff_combination_convert (aff, uutype);
2782
2783   aff_combination_scale (&var_aff, rat);
2784   aff_combination_add (aff, &var_aff);
2785
2786   return true;
2787 }
2788
2789 /* Determines the expression by that USE is expressed from induction variable
2790    CAND at statement AT in LOOP.  The computation is unshared.  */
2791
2792 static tree
2793 get_computation_at (struct loop *loop,
2794                     struct iv_use *use, struct iv_cand *cand, tree at)
2795 {
2796   aff_tree aff;
2797   tree type = TREE_TYPE (use->iv->base);
2798
2799   if (!get_computation_aff (loop, use, cand, at, &aff))
2800     return NULL_TREE;
2801   unshare_aff_combination (&aff);
2802   return fold_convert (type, aff_combination_to_tree (&aff));
2803 }
2804
2805 /* Determines the expression by that USE is expressed from induction variable
2806    CAND in LOOP.  The computation is unshared.  */
2807
2808 static tree
2809 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2810 {
2811   return get_computation_at (loop, use, cand, use->stmt);
2812 }
2813
2814 /* Returns cost of addition in MODE.  */
2815
2816 static unsigned
2817 add_cost (enum machine_mode mode)
2818 {
2819   static unsigned costs[NUM_MACHINE_MODES];
2820   rtx seq;
2821   unsigned cost;
2822
2823   if (costs[mode])
2824     return costs[mode];
2825
2826   start_sequence ();
2827   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2828                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2829                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2830                  NULL_RTX);
2831   seq = get_insns ();
2832   end_sequence ();
2833
2834   cost = seq_cost (seq);
2835   if (!cost)
2836     cost = 1;
2837
2838   costs[mode] = cost;
2839
2840   if (dump_file && (dump_flags & TDF_DETAILS))
2841     fprintf (dump_file, "Addition in %s costs %d\n",
2842              GET_MODE_NAME (mode), cost);
2843   return cost;
2844 }
2845
2846 /* Entry in a hashtable of already known costs for multiplication.  */
2847 struct mbc_entry
2848 {
2849   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2850   enum machine_mode mode;       /* In mode.  */
2851   unsigned cost;                /* The cost.  */
2852 };
2853
2854 /* Counts hash value for the ENTRY.  */
2855
2856 static hashval_t
2857 mbc_entry_hash (const void *entry)
2858 {
2859   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2860
2861   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2862 }
2863
2864 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2865
2866 static int
2867 mbc_entry_eq (const void *entry1, const void *entry2)
2868 {
2869   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2870   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2871
2872   return (e1->mode == e2->mode
2873           && e1->cst == e2->cst);
2874 }
2875
2876 /* Returns cost of multiplication by constant CST in MODE.  */
2877
2878 unsigned
2879 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2880 {
2881   static htab_t costs;
2882   struct mbc_entry **cached, act;
2883   rtx seq;
2884   unsigned cost;
2885
2886   if (!costs)
2887     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2888
2889   act.mode = mode;
2890   act.cst = cst;
2891   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2892   if (*cached)
2893     return (*cached)->cost;
2894
2895   *cached = XNEW (struct mbc_entry);
2896   (*cached)->mode = mode;
2897   (*cached)->cst = cst;
2898
2899   start_sequence ();
2900   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2901                gen_int_mode (cst, mode), NULL_RTX, 0);
2902   seq = get_insns ();
2903   end_sequence ();
2904
2905   cost = seq_cost (seq);
2906
2907   if (dump_file && (dump_flags & TDF_DETAILS))
2908     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2909              (int) cst, GET_MODE_NAME (mode), cost);
2910
2911   (*cached)->cost = cost;
2912
2913   return cost;
2914 }
2915
2916 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2917    validity for a memory reference accessing memory of mode MODE.  */
2918
2919 bool
2920 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2921 {
2922 #define MAX_RATIO 128
2923   static sbitmap valid_mult[MAX_MACHINE_MODE];
2924
2925   if (!valid_mult[mode])
2926     {
2927       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2928       rtx addr;
2929       HOST_WIDE_INT i;
2930
2931       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2932       sbitmap_zero (valid_mult[mode]);
2933       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2934       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2935         {
2936           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2937           if (memory_address_p (mode, addr))
2938             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2939         }
2940
2941       if (dump_file && (dump_flags & TDF_DETAILS))
2942         {
2943           fprintf (dump_file, "  allowed multipliers:");
2944           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2945             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2946               fprintf (dump_file, " %d", (int) i);
2947           fprintf (dump_file, "\n");
2948           fprintf (dump_file, "\n");
2949         }
2950     }
2951
2952   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2953     return false;
2954
2955   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2956 }
2957
2958 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2959    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2960    variable is omitted.  Compute the cost for a memory reference that accesses
2961    a memory location of mode MEM_MODE.
2962
2963    TODO -- there must be some better way.  This all is quite crude.  */
2964
2965 static comp_cost
2966 get_address_cost (bool symbol_present, bool var_present,
2967                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2968                   enum machine_mode mem_mode)
2969 {
2970   static bool initialized[MAX_MACHINE_MODE];
2971   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2972   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2973   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2974   unsigned cost, acost, complexity;
2975   bool offset_p, ratio_p;
2976   HOST_WIDE_INT s_offset;
2977   unsigned HOST_WIDE_INT mask;
2978   unsigned bits;
2979
2980   if (!initialized[mem_mode])
2981     {
2982       HOST_WIDE_INT i;
2983       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2984       int old_cse_not_expected;
2985       unsigned sym_p, var_p, off_p, rat_p, add_c;
2986       rtx seq, addr, base;
2987       rtx reg0, reg1;
2988
2989       initialized[mem_mode] = true;
2990
2991       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2992
2993       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2994       for (i = start; i <= 1 << 20; i <<= 1)
2995         {
2996           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2997           if (!memory_address_p (mem_mode, addr))
2998             break;
2999         }
3000       max_offset[mem_mode] = i == start ? 0 : i >> 1;
3001       off[mem_mode] = max_offset[mem_mode];
3002
3003       for (i = start; i <= 1 << 20; i <<= 1)
3004         {
3005           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3006           if (!memory_address_p (mem_mode, addr))
3007             break;
3008         }
3009       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3010
3011       if (dump_file && (dump_flags & TDF_DETAILS))
3012         {
3013           fprintf (dump_file, "get_address_cost:\n");
3014           fprintf (dump_file, "  min offset %s %d\n",
3015                    GET_MODE_NAME (mem_mode),
3016                    (int) min_offset[mem_mode]);
3017           fprintf (dump_file, "  max offset %s %d\n",
3018                    GET_MODE_NAME (mem_mode),
3019                    (int) max_offset[mem_mode]);
3020         }
3021
3022       rat[mem_mode] = 1;
3023       for (i = 2; i <= MAX_RATIO; i++)
3024         if (multiplier_allowed_in_address_p (i, mem_mode))
3025           {
3026             rat[mem_mode] = i;
3027             break;
3028           }
3029
3030       /* Compute the cost of various addressing modes.  */
3031       acost = 0;
3032       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3033       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3034
3035       for (i = 0; i < 16; i++)
3036         {
3037           sym_p = i & 1;
3038           var_p = (i >> 1) & 1;
3039           off_p = (i >> 2) & 1;
3040           rat_p = (i >> 3) & 1;
3041
3042           addr = reg0;
3043           if (rat_p)
3044             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3045                                    gen_int_mode (rat[mem_mode], Pmode));
3046
3047           if (var_p)
3048             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3049
3050           if (sym_p)
3051             {
3052               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3053               /* ??? We can run into trouble with some backends by presenting
3054                  it with symbols which havn't been properly passed through
3055                  targetm.encode_section_info.  By setting the local bit, we
3056                  enhance the probability of things working.  */
3057               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3058
3059               if (off_p)
3060                 base = gen_rtx_fmt_e (CONST, Pmode,
3061                                       gen_rtx_fmt_ee (PLUS, Pmode,
3062                                                       base,
3063                                                       gen_int_mode (off[mem_mode],
3064                                                                     Pmode)));
3065             }
3066           else if (off_p)
3067             base = gen_int_mode (off[mem_mode], Pmode);
3068           else
3069             base = NULL_RTX;
3070
3071           if (base)
3072             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3073
3074           start_sequence ();
3075           /* To avoid splitting addressing modes, pretend that no cse will
3076              follow.  */
3077           old_cse_not_expected = cse_not_expected;
3078           cse_not_expected = true;
3079           addr = memory_address (mem_mode, addr);
3080           cse_not_expected = old_cse_not_expected;
3081           seq = get_insns ();
3082           end_sequence ();
3083
3084           acost = seq_cost (seq);
3085           acost += address_cost (addr, mem_mode);
3086
3087           if (!acost)
3088             acost = 1;
3089           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3090         }
3091
3092       /* On some targets, it is quite expensive to load symbol to a register,
3093          which makes addresses that contain symbols look much more expensive.
3094          However, the symbol will have to be loaded in any case before the
3095          loop (and quite likely we have it in register already), so it does not
3096          make much sense to penalize them too heavily.  So make some final
3097          tweaks for the SYMBOL_PRESENT modes:
3098
3099          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3100          var is cheaper, use this mode with small penalty.
3101          If VAR_PRESENT is true, try whether the mode with
3102          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3103          if this is the case, use it.  */
3104       add_c = add_cost (Pmode);
3105       for (i = 0; i < 8; i++)
3106         {
3107           var_p = i & 1;
3108           off_p = (i >> 1) & 1;
3109           rat_p = (i >> 2) & 1;
3110
3111           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3112           if (var_p)
3113             acost += add_c;
3114
3115           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3116             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3117         }
3118
3119       if (dump_file && (dump_flags & TDF_DETAILS))
3120         {
3121           fprintf (dump_file, "Address costs:\n");
3122
3123           for (i = 0; i < 16; i++)
3124             {
3125               sym_p = i & 1;
3126               var_p = (i >> 1) & 1;
3127               off_p = (i >> 2) & 1;
3128               rat_p = (i >> 3) & 1;
3129
3130               fprintf (dump_file, "  ");
3131               if (sym_p)
3132                 fprintf (dump_file, "sym + ");
3133               if (var_p)
3134                 fprintf (dump_file, "var + ");
3135               if (off_p)
3136                 fprintf (dump_file, "cst + ");
3137               if (rat_p)
3138                 fprintf (dump_file, "rat * ");
3139
3140               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3141               fprintf (dump_file, "index costs %d\n", acost);
3142             }
3143           fprintf (dump_file, "\n");
3144         }
3145     }
3146
3147   bits = GET_MODE_BITSIZE (Pmode);
3148   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3149   offset &= mask;
3150   if ((offset >> (bits - 1) & 1))
3151     offset |= ~mask;
3152   s_offset = offset;
3153
3154   cost = 0;
3155   offset_p = (s_offset != 0
3156               && min_offset[mem_mode] <= s_offset
3157               && s_offset <= max_offset[mem_mode]);
3158   ratio_p = (ratio != 1
3159              && multiplier_allowed_in_address_p (ratio, mem_mode));
3160
3161   if (ratio != 1 && !ratio_p)
3162     cost += multiply_by_cost (ratio, Pmode);
3163
3164   if (s_offset && !offset_p && !symbol_present)
3165     cost += add_cost (Pmode);
3166
3167   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3168   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3169   return new_cost (cost + acost, complexity);
3170 }
3171
3172 /* Estimates cost of forcing expression EXPR into a variable.  */
3173
3174 static comp_cost
3175 force_expr_to_var_cost (tree expr)
3176 {
3177   static bool costs_initialized = false;
3178   static unsigned integer_cost;
3179   static unsigned symbol_cost;
3180   static unsigned address_cost;
3181   tree op0, op1;
3182   comp_cost cost0, cost1, cost;
3183   enum machine_mode mode;
3184
3185   if (!costs_initialized)
3186     {
3187       tree type = build_pointer_type (integer_type_node);
3188       tree var, addr;
3189       rtx x;
3190
3191       var = create_tmp_var_raw (integer_type_node, "test_var");
3192       TREE_STATIC (var) = 1;
3193       x = produce_memory_decl_rtl (var, NULL);
3194       SET_DECL_RTL (var, x);
3195
3196       integer_cost = computation_cost (build_int_cst (integer_type_node,
3197                                                       2000));
3198
3199       addr = build1 (ADDR_EXPR, type, var);
3200       symbol_cost = computation_cost (addr) + 1;
3201
3202       address_cost
3203         = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3204                                     addr,
3205                                     build_int_cst (sizetype, 2000))) + 1;
3206       if (dump_file && (dump_flags & TDF_DETAILS))
3207         {
3208           fprintf (dump_file, "force_expr_to_var_cost:\n");
3209           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3210           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3211           fprintf (dump_file, "  address %d\n", (int) address_cost);
3212           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3213           fprintf (dump_file, "\n");
3214         }
3215
3216       costs_initialized = true;
3217     }
3218
3219   STRIP_NOPS (expr);
3220
3221   if (SSA_VAR_P (expr))
3222     return zero_cost;
3223
3224   if (is_gimple_min_invariant (expr))
3225     {
3226       if (TREE_CODE (expr) == INTEGER_CST)
3227         return new_cost (integer_cost, 0);
3228
3229       if (TREE_CODE (expr) == ADDR_EXPR)
3230         {
3231           tree obj = TREE_OPERAND (expr, 0);
3232
3233           if (TREE_CODE (obj) == VAR_DECL
3234               || TREE_CODE (obj) == PARM_DECL
3235               || TREE_CODE (obj) == RESULT_DECL)
3236             return new_cost (symbol_cost, 0);
3237         }
3238
3239       return new_cost (address_cost, 0);
3240     }
3241
3242   switch (TREE_CODE (expr))
3243     {
3244     case POINTER_PLUS_EXPR:
3245     case PLUS_EXPR:
3246     case MINUS_EXPR:
3247     case MULT_EXPR:
3248       op0 = TREE_OPERAND (expr, 0);
3249       op1 = TREE_OPERAND (expr, 1);
3250       STRIP_NOPS (op0);
3251       STRIP_NOPS (op1);
3252
3253       if (is_gimple_val (op0))
3254         cost0 = zero_cost;
3255       else
3256         cost0 = force_expr_to_var_cost (op0);
3257
3258       if (is_gimple_val (op1))
3259         cost1 = zero_cost;
3260       else
3261         cost1 = force_expr_to_var_cost (op1);
3262
3263       break;
3264
3265     default:
3266       /* Just an arbitrary value, FIXME.  */
3267       return new_cost (target_spill_cost, 0);
3268     }
3269
3270   mode = TYPE_MODE (TREE_TYPE (expr));
3271   switch (TREE_CODE (expr))
3272     {
3273     case POINTER_PLUS_EXPR:
3274     case PLUS_EXPR:
3275     case MINUS_EXPR:
3276       cost = new_cost (add_cost (mode), 0);
3277       break;
3278
3279     case MULT_EXPR:
3280       if (cst_and_fits_in_hwi (op0))
3281         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3282       else if (cst_and_fits_in_hwi (op1))
3283         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3284       else
3285         return new_cost (target_spill_cost, 0);
3286       break;
3287
3288     default:
3289       gcc_unreachable ();
3290     }
3291
3292   cost = add_costs (cost, cost0);
3293   cost = add_costs (cost, cost1);
3294
3295   /* Bound the cost by target_spill_cost.  The parts of complicated
3296      computations often are either loop invariant or at least can
3297      be shared between several iv uses, so letting this grow without
3298      limits would not give reasonable results.  */
3299   if (cost.cost > target_spill_cost)
3300     cost.cost = target_spill_cost;
3301
3302   return cost;
3303 }
3304
3305 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3306    invariants the computation depends on.  */
3307
3308 static comp_cost
3309 force_var_cost (struct ivopts_data *data,
3310                 tree expr, bitmap *depends_on)
3311 {
3312   if (depends_on)
3313     {
3314       fd_ivopts_data = data;
3315       walk_tree (&expr, find_depends, depends_on, NULL);
3316     }
3317
3318   return force_expr_to_var_cost (expr);
3319 }
3320
3321 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3322    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3323    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3324    invariants the computation depends on.  */
3325
3326 static comp_cost
3327 split_address_cost (struct ivopts_data *data,
3328                     tree addr, bool *symbol_present, bool *var_present,
3329                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3330 {
3331   tree core;
3332   HOST_WIDE_INT bitsize;
3333   HOST_WIDE_INT bitpos;
3334   tree toffset;
3335   enum machine_mode mode;
3336   int unsignedp, volatilep;
3337
3338   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3339                               &unsignedp, &volatilep, false);
3340
3341   if (toffset != 0
3342       || bitpos % BITS_PER_UNIT != 0
3343       || TREE_CODE (core) != VAR_DECL)
3344     {
3345       *symbol_present = false;
3346       *var_present = true;
3347       fd_ivopts_data = data;
3348       walk_tree (&addr, find_depends, depends_on, NULL);
3349       return new_cost (target_spill_cost, 0);
3350     }
3351
3352   *offset += bitpos / BITS_PER_UNIT;
3353   if (TREE_STATIC (core)
3354       || DECL_EXTERNAL (core))
3355     {
3356       *symbol_present = true;
3357       *var_present = false;
3358       return zero_cost;
3359     }
3360
3361   *symbol_present = false;
3362   *var_present = true;
3363   return zero_cost;
3364 }
3365
3366 /* Estimates cost of expressing difference of addresses E1 - E2 as
3367    var + symbol + offset.  The value of offset is added to OFFSET,
3368    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3369    part is missing.  DEPENDS_ON is a set of the invariants the computation
3370    depends on.  */
3371
3372 static comp_cost
3373 ptr_difference_cost (struct ivopts_data *data,
3374                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3375                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3376 {
3377   HOST_WIDE_INT diff = 0;
3378   comp_cost cost;
3379
3380   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3381
3382   if (ptr_difference_const (e1, e2, &diff))
3383     {
3384       *offset += diff;
3385       *symbol_present = false;
3386       *var_present = false;
3387       return zero_cost;
3388     }
3389
3390   if (integer_zerop (e2))
3391     return split_address_cost (data, TREE_OPERAND (e1, 0),
3392                                symbol_present, var_present, offset, depends_on);
3393
3394   *symbol_present = false;
3395   *var_present = true;
3396
3397   cost = force_var_cost (data, e1, depends_on);
3398   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3399   cost.cost += add_cost (Pmode);
3400
3401   return cost;
3402 }
3403
3404 /* Estimates cost of expressing difference E1 - E2 as
3405    var + symbol + offset.  The value of offset is added to OFFSET,
3406    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3407    part is missing.  DEPENDS_ON is a set of the invariants the computation
3408    depends on.  */
3409
3410 static comp_cost
3411 difference_cost (struct ivopts_data *data,
3412                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3413                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3414 {
3415   comp_cost cost;
3416   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3417   unsigned HOST_WIDE_INT off1, off2;
3418
3419   e1 = strip_offset (e1, &off1);
3420   e2 = strip_offset (e2, &off2);
3421   *offset += off1 - off2;
3422
3423   STRIP_NOPS (e1);
3424   STRIP_NOPS (e2);
3425
3426   if (TREE_CODE (e1) == ADDR_EXPR)
3427     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3428                                 depends_on);
3429   *symbol_present = false;
3430
3431   if (operand_equal_p (e1, e2, 0))
3432     {
3433       *var_present = false;
3434       return zero_cost;
3435     }
3436   *var_present = true;
3437   if (integer_zerop (e2))
3438     return force_var_cost (data, e1, depends_on);
3439
3440   if (integer_zerop (e1))
3441     {
3442       cost = force_var_cost (data, e2, depends_on);
3443       cost.cost += multiply_by_cost (-1, mode);
3444
3445       return cost;
3446     }
3447
3448   cost = force_var_cost (data, e1, depends_on);
3449   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3450   cost.cost += add_cost (mode);
3451
3452   return cost;
3453 }
3454
3455 /* Determines the cost of the computation by that USE is expressed
3456    from induction variable CAND.  If ADDRESS_P is true, we just need
3457    to create an address from it, otherwise we want to get it into
3458    register.  A set of invariants we depend on is stored in
3459    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3460
3461 static comp_cost
3462 get_computation_cost_at (struct ivopts_data *data,
3463                          struct iv_use *use, struct iv_cand *cand,
3464                          bool address_p, bitmap *depends_on, tree at)
3465 {
3466   tree ubase = use->iv->base, ustep = use->iv->step;
3467   tree cbase, cstep;
3468   tree utype = TREE_TYPE (ubase), ctype;
3469   unsigned HOST_WIDE_INT cstepi, offset = 0;
3470   HOST_WIDE_INT ratio, aratio;
3471   bool var_present, symbol_present;
3472   comp_cost cost;
3473   unsigned n_sums;
3474   double_int rat;
3475
3476   *depends_on = NULL;
3477
3478   /* Only consider real candidates.  */
3479   if (!cand->iv)
3480     return infinite_cost;
3481
3482   cbase = cand->iv->base;
3483   cstep = cand->iv->step;
3484   ctype = TREE_TYPE (cbase);
3485
3486   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3487     {
3488       /* We do not have a precision to express the values of use.  */
3489       return infinite_cost;
3490     }
3491
3492   if (address_p)
3493     {
3494       /* Do not try to express address of an object with computation based
3495          on address of a different object.  This may cause problems in rtl
3496          level alias analysis (that does not expect this to be happening,
3497          as this is illegal in C), and would be unlikely to be useful
3498          anyway.  */
3499       if (use->iv->base_object
3500           && cand->iv->base_object
3501           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3502         return infinite_cost;
3503     }
3504
3505   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3506     {
3507       /* TODO -- add direct handling of this case.  */
3508       goto fallback;
3509     }
3510
3511   /* CSTEPI is removed from the offset in case statement is after the
3512      increment.  If the step is not constant, we use zero instead.
3513      This is a bit imprecise (there is the extra addition), but
3514      redundancy elimination is likely to transform the code so that
3515      it uses value of the variable before increment anyway,
3516      so it is not that much unrealistic.  */
3517   if (cst_and_fits_in_hwi (cstep))
3518     cstepi = int_cst_value (cstep);
3519   else
3520     cstepi = 0;
3521
3522   if (!constant_multiple_of (ustep, cstep, &rat))
3523     return infinite_cost;
3524
3525   if (double_int_fits_in_shwi_p (rat))
3526     ratio = double_int_to_shwi (rat);
3527   else
3528     return infinite_cost;
3529
3530   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3531      or ratio == 1, it is better to handle this like
3532
3533      ubase - ratio * cbase + ratio * var
3534
3535      (also holds in the case ratio == -1, TODO.  */
3536
3537   if (cst_and_fits_in_hwi (cbase))
3538     {
3539       offset = - ratio * int_cst_value (cbase);
3540       cost = difference_cost (data,
3541                               ubase, build_int_cst (utype, 0),
3542                               &symbol_present, &var_present, &offset,
3543                               depends_on);
3544     }
3545   else if (ratio == 1)
3546     {
3547       cost = difference_cost (data,
3548                               ubase, cbase,
3549                               &symbol_present, &var_present, &offset,
3550                               depends_on);
3551     }
3552   else
3553     {
3554       cost = force_var_cost (data, cbase, depends_on);
3555       cost.cost += add_cost (TYPE_MODE (ctype));
3556       cost = add_costs (cost,
3557                         difference_cost (data,
3558                                          ubase, build_int_cst (utype, 0),
3559                                          &symbol_present, &var_present,
3560                                          &offset, depends_on));
3561     }
3562
3563   /* If we are after the increment, the value of the candidate is higher by
3564      one iteration.  */
3565   if (stmt_after_increment (data->current_loop, cand, at))
3566     offset -= ratio * cstepi;
3567
3568   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3569      (symbol/var/const parts may be omitted).  If we are looking for an address,
3570      find the cost of addressing this.  */
3571   if (address_p)
3572     return add_costs (cost, get_address_cost (symbol_present, var_present,
3573                                 offset, ratio,
3574                                 TYPE_MODE (TREE_TYPE (*use->op_p))));
3575
3576   /* Otherwise estimate the costs for computing the expression.  */
3577   aratio = ratio > 0 ? ratio : -ratio;
3578   if (!symbol_present && !var_present && !offset)
3579     {
3580       if (ratio != 1)
3581         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3582
3583       return cost;
3584     }
3585
3586   if (aratio != 1)
3587     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3588
3589   n_sums = 1;
3590   if (var_present
3591       /* Symbol + offset should be compile-time computable.  */
3592       && (symbol_present || offset))
3593     n_sums++;
3594
3595   /* Having offset does not affect runtime cost in case it is added to
3596      symbol, but it increases complexity.  */
3597   if (offset)
3598     cost.complexity++;
3599
3600   cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3601   return cost;
3602
3603 fallback:
3604   {
3605     /* Just get the expression, expand it and measure the cost.  */
3606     tree comp = get_computation_at (data->current_loop, use, cand, at);
3607
3608     if (!comp)
3609       return infinite_cost;
3610
3611     if (address_p)
3612       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3613
3614     return new_cost (computation_cost (comp), 0);
3615   }
3616 }
3617
3618 /* Determines the cost of the computation by that USE is expressed
3619    from induction variable CAND.  If ADDRESS_P is true, we just need
3620    to create an address from it, otherwise we want to get it into
3621    register.  A set of invariants we depend on is stored in
3622    DEPENDS_ON.  */
3623
3624 static comp_cost
3625 get_computation_cost (struct ivopts_data *data,
3626                       struct iv_use *use, struct iv_cand *cand,
3627                       bool address_p, bitmap *depends_on)
3628 {
3629   return get_computation_cost_at (data,
3630                                   use, cand, address_p, depends_on, use->stmt);
3631 }
3632
3633 /* Determines cost of basing replacement of USE on CAND in a generic
3634    expression.  */
3635
3636 static bool
3637 determine_use_iv_cost_generic (struct ivopts_data *data,
3638                                struct iv_use *use, struct iv_cand *cand)
3639 {
3640   bitmap depends_on;
3641   comp_cost cost;
3642
3643   /* The simple case first -- if we need to express value of the preserved
3644      original biv, the cost is 0.  This also prevents us from counting the
3645      cost of increment twice -- once at this use and once in the cost of
3646      the candidate.  */
3647   if (cand->pos == IP_ORIGINAL
3648       && cand->incremented_at == use->stmt)
3649     {
3650       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3651       return true;
3652     }
3653
3654   cost = get_computation_cost (data, use, cand, false, &depends_on);
3655   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3656
3657   return !infinite_cost_p (cost);
3658 }
3659
3660 /* Determines cost of basing replacement of USE on CAND in an address.  */
3661
3662 static bool
3663 determine_use_iv_cost_address (struct ivopts_data *data,
3664                                struct iv_use *use, struct iv_cand *cand)
3665 {
3666   bitmap depends_on;
3667   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3668
3669   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3670
3671   return !infinite_cost_p (cost);
3672 }
3673
3674 /* Computes value of candidate CAND at position AT in iteration NITER, and
3675    stores it to VAL.  */
3676
3677 static void
3678 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3679                aff_tree *val)
3680 {
3681   aff_tree step, delta, nit;
3682   struct iv *iv = cand->iv;
3683   tree type = TREE_TYPE (iv->base);
3684   tree steptype = type;
3685   if (POINTER_TYPE_P (type))
3686     steptype = sizetype;
3687
3688   tree_to_aff_combination (iv->step, steptype, &step);
3689   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3690   aff_combination_convert (&nit, steptype);
3691   aff_combination_mult (&nit, &step, &delta);
3692   if (stmt_after_increment (loop, cand, at))
3693     aff_combination_add (&delta, &step);
3694
3695   tree_to_aff_combination (iv->base, type, val);
3696   aff_combination_add (val, &delta);
3697 }
3698
3699 /* Returns period of induction variable iv.  */
3700
3701 static tree
3702 iv_period (struct iv *iv)
3703 {
3704   tree step = iv->step, period, type;
3705   tree pow2div;
3706
3707   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3708
3709   /* Period of the iv is gcd (step, type range).  Since type range is power
3710      of two, it suffices to determine the maximum power of two that divides
3711      step.  */
3712   pow2div = num_ending_zeros (step);
3713   type = unsigned_type_for (TREE_TYPE (step));
3714
3715   period = build_low_bits_mask (type,
3716                                 (TYPE_PRECISION (type)
3717                                  - tree_low_cst (pow2div, 1)));
3718
3719   return period;
3720 }
3721
3722 /* Returns the comparison operator used when eliminating the iv USE.  */
3723
3724 static enum tree_code
3725 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3726 {
3727   struct loop *loop = data->current_loop;
3728   basic_block ex_bb;
3729   edge exit;
3730
3731   ex_bb = bb_for_stmt (use->stmt);
3732   exit = EDGE_SUCC (ex_bb, 0);
3733   if (flow_bb_inside_loop_p (loop, exit->dest))
3734     exit = EDGE_SUCC (ex_bb, 1);
3735
3736   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3737 }
3738
3739 /* Check whether it is possible to express the condition in USE by comparison
3740    of candidate CAND.  If so, store the value compared with to BOUND.  */
3741
3742 static bool
3743 may_eliminate_iv (struct ivopts_data *data,
3744                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3745 {
3746   basic_block ex_bb;
3747   edge exit;
3748   tree nit, period;
3749   struct loop *loop = data->current_loop;
3750   aff_tree bnd;
3751   double_int period_value, max_niter;
3752
3753   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3754     return false;
3755
3756   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3757      for other conditions inside loop body.  */
3758   ex_bb = bb_for_stmt (use->stmt);
3759   if (use->stmt != last_stmt (ex_bb)
3760       || TREE_CODE (use->stmt) != COND_EXPR)
3761     return false;
3762   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3763     return false;
3764
3765   exit = EDGE_SUCC (ex_bb, 0);
3766   if (flow_bb_inside_loop_p (loop, exit->dest))
3767     exit = EDGE_SUCC (ex_bb, 1);
3768   if (flow_bb_inside_loop_p (loop, exit->dest))
3769     return false;
3770
3771   nit = niter_for_exit (data, exit);
3772   if (!nit)
3773     return false;
3774
3775   /* Determine whether we may use the variable to test whether niter iterations
3776      elapsed.  This is the case iff the period of the induction variable is
3777      greater than the number of iterations.  */
3778   period = iv_period (cand->iv);
3779   if (!period)
3780     return false;
3781
3782   /* Compare the period with the estimate on the number of iterations of the
3783      loop.  */
3784   if (!estimated_loop_iterations (loop, true, &max_niter))
3785     return false;
3786   period_value = tree_to_double_int (period);
3787   if (double_int_ucmp (period_value, max_niter) <= 0)
3788     return false;
3789
3790   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3791   *bound = aff_combination_to_tree (&bnd);
3792   return true;
3793 }
3794
3795 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3796
3797 static bool
3798 determine_use_iv_cost_condition (struct ivopts_data *data,
3799                                  struct iv_use *use, struct iv_cand *cand)
3800 {
3801   tree bound = NULL_TREE;
3802   struct iv *cmp_iv;
3803   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3804   comp_cost elim_cost, express_cost, cost;
3805   bool ok;
3806
3807   /* Only consider real candidates.  */
3808   if (!cand->iv)
3809     {
3810       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3811       return false;
3812     }
3813
3814   /* Try iv elimination.  */
3815   if (may_eliminate_iv (data, use, cand, &bound))
3816     {
3817       elim_cost = force_var_cost (data, bound, &depends_on_elim);
3818       /* The bound is a loop invariant, so it will be only computed
3819          once.  */
3820       elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3821     }
3822   else
3823     elim_cost = infinite_cost;
3824
3825   /* Try expressing the original giv.  If it is compared with an invariant,
3826      note that we cannot get rid of it.  */
3827   ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3828   gcc_assert (ok);
3829
3830   express_cost = get_computation_cost (data, use, cand, false,
3831                                        &depends_on_express);
3832   fd_ivopts_data = data;
3833   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3834
3835   /* Choose the better approach.  */
3836   if (compare_costs (elim_cost, express_cost) < 0)
3837     {
3838       cost = elim_cost;
3839       depends_on = depends_on_elim;
3840       depends_on_elim = NULL;
3841     }
3842   else
3843     {
3844       cost = express_cost;
3845       depends_on = depends_on_express;
3846       depends_on_express = NULL;
3847       bound = NULL_TREE;
3848     }
3849
3850   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3851
3852   if (depends_on_elim)
3853     BITMAP_FREE (depends_on_elim);
3854   if (depends_on_express)
3855     BITMAP_FREE (depends_on_express);
3856
3857   return !infinite_cost_p (cost);
3858 }
3859
3860 /* Determines cost of basing replacement of USE on CAND.  Returns false
3861    if USE cannot be based on CAND.  */
3862
3863 static bool
3864 determine_use_iv_cost (struct ivopts_data *data,
3865                        struct iv_use *use, struct iv_cand *cand)
3866 {
3867   switch (use->type)
3868     {
3869     case USE_NONLINEAR_EXPR:
3870       return determine_use_iv_cost_generic (data, use, cand);
3871
3872     case USE_ADDRESS:
3873       return determine_use_iv_cost_address (data, use, cand);
3874
3875     case USE_COMPARE:
3876       return determine_use_iv_cost_condition (data, use, cand);
3877
3878     default:
3879       gcc_unreachable ();
3880     }
3881 }
3882
3883 /* Determines costs of basing the use of the iv on an iv candidate.  */
3884
3885 static void
3886 determine_use_iv_costs (struct ivopts_data *data)
3887 {
3888   unsigned i, j;
3889   struct iv_use *use;
3890   struct iv_cand *cand;
3891   bitmap to_clear = BITMAP_ALLOC (NULL);
3892
3893   alloc_use_cost_map (data);
3894
3895   for (i = 0; i < n_iv_uses (data); i++)
3896     {
3897       use = iv_use (data, i);
3898
3899       if (data->consider_all_candidates)
3900         {
3901           for (j = 0; j < n_iv_cands (data); j++)
3902             {
3903               cand = iv_cand (data, j);
3904               determine_use_iv_cost (data, use, cand);
3905             }
3906         }
3907       else
3908         {
3909           bitmap_iterator bi;
3910
3911           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3912             {
3913               cand = iv_cand (data, j);
3914               if (!determine_use_iv_cost (data, use, cand))
3915                 bitmap_set_bit (to_clear, j);
3916             }
3917
3918           /* Remove the candidates for that the cost is infinite from
3919              the list of related candidates.  */
3920           bitmap_and_compl_into (use->related_cands, to_clear);
3921           bitmap_clear (to_clear);
3922         }
3923     }
3924
3925   BITMAP_FREE (to_clear);
3926
3927   if (dump_file && (dump_flags & TDF_DETAILS))
3928     {
3929       fprintf (dump_file, "Use-candidate costs:\n");
3930
3931       for (i = 0; i < n_iv_uses (data); i++)
3932         {
3933           use = iv_use (data, i);
3934
3935           fprintf (dump_file, "Use %d:\n", i);
3936           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
3937           for (j = 0; j < use->n_map_members; j++)
3938             {
3939               if (!use->cost_map[j].cand
3940                   || infinite_cost_p (use->cost_map[j].cost))
3941                 continue;
3942
3943               fprintf (dump_file, "  %d\t%d\t%d\t",
3944                        use->cost_map[j].cand->id,
3945                        use->cost_map[j].cost.cost,
3946                        use->cost_map[j].cost.complexity);
3947               if (use->cost_map[j].depends_on)
3948                 bitmap_print (dump_file,
3949                               use->cost_map[j].depends_on, "","");
3950               fprintf (dump_file, "\n");
3951             }
3952
3953           fprintf (dump_file, "\n");
3954         }
3955       fprintf (dump_file, "\n");
3956     }
3957 }
3958
3959 /* Determines cost of the candidate CAND.  */
3960
3961 static void
3962 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3963 {
3964   comp_cost cost_base;
3965   unsigned cost, cost_step;
3966   tree base;
3967
3968   if (!cand->iv)
3969     {
3970       cand->cost = 0;
3971       return;
3972     }
3973
3974   /* There are two costs associated with the candidate -- its increment
3975      and its initialization.  The second is almost negligible for any loop
3976      that rolls enough, so we take it just very little into account.  */
3977
3978   base = cand->iv->base;
3979   cost_base = force_var_cost (data, base, NULL);
3980   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3981
3982   cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
3983
3984   /* Prefer the original ivs unless we may gain something by replacing it.
3985      The reason is to makee debugging simpler; so this is not relevant for
3986      artificial ivs created by other optimization passes.  */
3987   if (cand->pos != IP_ORIGINAL
3988       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3989     cost++;
3990
3991   /* Prefer not to insert statements into latch unless there are some
3992      already (so that we do not create unnecessary jumps).  */
3993   if (cand->pos == IP_END
3994       && empty_block_p (ip_end_pos (data->current_loop)))
3995     cost++;
3996
3997   cand->cost = cost;
3998 }
3999
4000 /* Determines costs of computation of the candidates.  */
4001
4002 static void
4003 determine_iv_costs (struct ivopts_data *data)
4004 {
4005   unsigned i;
4006
4007   if (dump_file && (dump_flags & TDF_DETAILS))
4008     {
4009       fprintf (dump_file, "Candidate costs:\n");
4010       fprintf (dump_file, "  cand\tcost\n");
4011     }
4012
4013   for (i = 0; i < n_iv_cands (data); i++)
4014     {
4015       struct iv_cand *cand = iv_cand (data, i);
4016
4017       determine_iv_cost (data, cand);
4018
4019       if (dump_file && (dump_flags & TDF_DETAILS))
4020         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4021     }
4022
4023   if (dump_file && (dump_flags & TDF_DETAILS))
4024     fprintf (dump_file, "\n");
4025 }
4026
4027 /* Calculates cost for having SIZE induction variables.  */
4028
4029 static unsigned
4030 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4031 {
4032   /* We add size to the cost, so that we prefer eliminating ivs
4033      if possible.  */
4034   return size + estimate_reg_pressure_cost (size, data->regs_used);
4035 }
4036
4037 /* For each size of the induction variable set determine the penalty.  */
4038
4039 static void
4040 determine_set_costs (struct ivopts_data *data)
4041 {
4042   unsigned j, n;
4043   tree phi, op;
4044   struct loop *loop = data->current_loop;
4045   bitmap_iterator bi;
4046
4047   /* We use the following model (definitely improvable, especially the
4048      cost function -- TODO):
4049
4050      We estimate the number of registers available (using MD data), name it A.
4051
4052      We estimate the number of registers used by the loop, name it U.  This
4053      number is obtained as the number of loop phi nodes (not counting virtual
4054      registers and bivs) + the number of variables from outside of the loop.
4055
4056      We set a reserve R (free regs that are used for temporary computations,
4057      etc.).  For now the reserve is a constant 3.
4058
4059      Let I be the number of induction variables.
4060
4061      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4062         make a lot of ivs without a reason).
4063      -- if A - R < U + I <= A, the cost is I * PRES_COST
4064      -- if U + I > A, the cost is I * PRES_COST and
4065         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4066
4067   if (dump_file && (dump_flags & TDF_DETAILS))
4068     {
4069       fprintf (dump_file, "Global costs:\n");
4070       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4071       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost);
4072       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4073     }
4074
4075   n = 0;
4076   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4077     {
4078       op = PHI_RESULT (phi);
4079
4080       if (!is_gimple_reg (op))
4081         continue;
4082
4083       if (get_iv (data, op))
4084         continue;
4085
4086       n++;
4087     }
4088
4089   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4090     {
4091       struct version_info *info = ver_info (data, j);
4092
4093       if (info->inv_id && info->has_nonlin_use)
4094         n++;
4095     }
4096
4097   data->regs_used = n;
4098   if (dump_file && (dump_flags & TDF_DETAILS))
4099     fprintf (dump_file, "  regs_used %d\n", n);
4100
4101   if (dump_file && (dump_flags & TDF_DETAILS))
4102     {
4103       fprintf (dump_file, "  cost for size:\n");
4104       fprintf (dump_file, "  ivs\tcost\n");
4105       for (j = 0; j <= 2 * target_avail_regs; j++)
4106         fprintf (dump_file, "  %d\t%d\n", j,
4107                  ivopts_global_cost_for_size (data, j));
4108       fprintf (dump_file, "\n");
4109     }
4110 }
4111
4112 /* Returns true if A is a cheaper cost pair than B.  */
4113
4114 static bool
4115 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4116 {
4117   int cmp;
4118
4119   if (!a)
4120     return false;
4121
4122   if (!b)
4123     return true;
4124
4125   cmp = compare_costs (a->cost, b->cost);
4126   if (cmp < 0)
4127     return true;
4128
4129   if (cmp > 0)
4130     return false;
4131
4132   /* In case the costs are the same, prefer the cheaper candidate.  */
4133   if (a->cand->cost < b->cand->cost)
4134     return true;
4135
4136   return false;
4137 }
4138
4139 /* Computes the cost field of IVS structure.  */
4140
4141 static void
4142 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4143 {
4144   comp_cost cost = ivs->cand_use_cost;
4145   cost.cost += ivs->cand_cost;
4146   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4147
4148   ivs->cost = cost;
4149 }
4150
4151 /* Remove invariants in set INVS to set IVS.  */
4152
4153 static void
4154 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4155 {
4156   bitmap_iterator bi;
4157   unsigned iid;
4158
4159   if (!invs)
4160     return;
4161
4162   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4163     {
4164       ivs->n_invariant_uses[iid]--;
4165       if (ivs->n_invariant_uses[iid] == 0)
4166         ivs->n_regs--;
4167     }
4168 }
4169
4170 /* Set USE not to be expressed by any candidate in IVS.  */
4171
4172 static void
4173 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4174                  struct iv_use *use)
4175 {
4176   unsigned uid = use->id, cid;
4177   struct cost_pair *cp;
4178
4179   cp = ivs->cand_for_use[uid];
4180   if (!cp)
4181     return;
4182   cid = cp->cand->id;
4183
4184   ivs->bad_uses++;
4185   ivs->cand_for_use[uid] = NULL;
4186   ivs->n_cand_uses[cid]--;
4187
4188   if (ivs->n_cand_uses[cid] == 0)
4189     {
4190       bitmap_clear_bit (ivs->cands, cid);
4191       /* Do not count the pseudocandidates.  */
4192       if (cp->cand->iv)
4193         ivs->n_regs--;
4194       ivs->n_cands--;
4195       ivs->cand_cost -= cp->cand->cost;
4196
4197       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4198     }
4199
4200   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4201
4202   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4203   iv_ca_recount_cost (data, ivs);
4204 }
4205
4206 /* Add invariants in set INVS to set IVS.  */
4207
4208 static void
4209 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4210 {
4211   bitmap_iterator bi;
4212   unsigned iid;
4213
4214   if (!invs)
4215     return;
4216
4217   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4218     {
4219       ivs->n_invariant_uses[iid]++;
4220       if (ivs->n_invariant_uses[iid] == 1)
4221         ivs->n_regs++;
4222     }
4223 }
4224
4225 /* Set cost pair for USE in set IVS to CP.  */
4226
4227 static void
4228 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4229               struct iv_use *use, struct cost_pair *cp)
4230 {
4231   unsigned uid = use->id, cid;
4232
4233   if (ivs->cand_for_use[uid] == cp)
4234     return;
4235
4236   if (ivs->cand_for_use[uid])
4237     iv_ca_set_no_cp (data, ivs, use);
4238
4239   if (cp)
4240     {
4241       cid = cp->cand->id;
4242
4243       ivs->bad_uses--;
4244       ivs->cand_for_use[uid] = cp;
4245       ivs->n_cand_uses[cid]++;
4246       if (ivs->n_cand_uses[cid] == 1)
4247         {
4248           bitmap_set_bit (ivs->cands, cid);
4249           /* Do not count the pseudocandidates.  */
4250           if (cp->cand->iv)
4251             ivs->n_regs++;
4252           ivs->n_cands++;
4253           ivs->cand_cost += cp->cand->cost;
4254
4255           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4256         }
4257
4258       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4259       iv_ca_set_add_invariants (ivs, cp->depends_on);
4260       iv_ca_recount_cost (data, ivs);
4261     }
4262 }
4263
4264 /* Extend set IVS by expressing USE by some of the candidates in it
4265    if possible.  */
4266
4267 static void
4268 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4269                struct iv_use *use)
4270 {
4271   struct cost_pair *best_cp = NULL, *cp;
4272   bitmap_iterator bi;
4273   unsigned i;
4274
4275   gcc_assert (ivs->upto >= use->id);
4276
4277   if (ivs->upto == use->id)
4278     {
4279       ivs->upto++;
4280       ivs->bad_uses++;
4281     }
4282
4283   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4284     {
4285       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4286
4287       if (cheaper_cost_pair (cp, best_cp))
4288         best_cp = cp;
4289     }
4290
4291   iv_ca_set_cp (data, ivs, use, best_cp);
4292 }
4293
4294 /* Get cost for assignment IVS.  */
4295
4296 static comp_cost
4297 iv_ca_cost (struct iv_ca *ivs)
4298 {
4299   return (ivs->bad_uses ? infinite_cost : ivs->cost);
4300 }
4301
4302 /* Returns true if all dependences of CP are among invariants in IVS.  */
4303
4304 static bool
4305 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4306 {
4307   unsigned i;
4308   bitmap_iterator bi;
4309
4310   if (!cp->depends_on)
4311     return true;
4312
4313   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4314     {
4315       if (ivs->n_invariant_uses[i] == 0)
4316         return false;
4317     }
4318
4319   return true;
4320 }
4321
4322 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4323    it before NEXT_CHANGE.  */
4324
4325 static struct iv_ca_delta *
4326 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4327                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4328 {
4329   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4330
4331   change->use = use;
4332   change->old_cp = old_cp;
4333   change->new_cp = new_cp;
4334   change->next_change = next_change;
4335
4336   return change;
4337 }
4338
4339 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4340    are rewritten.  */
4341
4342 static struct iv_ca_delta *
4343 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4344 {
4345   struct iv_ca_delta *last;
4346
4347   if (!l2)
4348     return l1;
4349
4350   if (!l1)
4351     return l2;
4352
4353   for (last = l1; last->next_change; last = last->next_change)
4354     continue;
4355   last->next_change = l2;
4356
4357   return l1;
4358 }
4359
4360 /* Returns candidate by that USE is expressed in IVS.  */
4361
4362 static struct cost_pair *
4363 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4364 {
4365   return ivs->cand_for_use[use->id];
4366 }
4367
4368 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4369
4370 static struct iv_ca_delta *
4371 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4372 {
4373   struct iv_ca_delta *act, *next, *prev = NULL;
4374   struct cost_pair *tmp;
4375
4376   for (act = delta; act; act = next)
4377     {
4378       next = act->next_change;
4379       act->next_change = prev;
4380       prev = act;
4381
4382       tmp = act->old_cp;
4383       act->old_cp = act->new_cp;
4384       act->new_cp = tmp;
4385     }
4386
4387   return prev;
4388 }
4389
4390 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4391    reverted instead.  */
4392
4393 static void
4394 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4395                     struct iv_ca_delta *delta, bool forward)
4396 {
4397   struct cost_pair *from, *to;
4398   struct iv_ca_delta *act;
4399
4400   if (!forward)
4401     delta = iv_ca_delta_reverse (delta);
4402
4403   for (act = delta; act; act = act->next_change)
4404     {
4405       from = act->old_cp;
4406       to = act->new_cp;
4407       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4408       iv_ca_set_cp (data, ivs, act->use, to);
4409     }
4410
4411   if (!forward)
4412     iv_ca_delta_reverse (delta);
4413 }
4414
4415 /* Returns true if CAND is used in IVS.  */
4416
4417 static bool
4418 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4419 {
4420   return ivs->n_cand_uses[cand->id] > 0;
4421 }
4422
4423 /* Returns number of induction variable candidates in the set IVS.  */
4424
4425 static unsigned
4426 iv_ca_n_cands (struct iv_ca *ivs)
4427 {
4428   return ivs->n_cands;
4429 }
4430
4431 /* Free the list of changes DELTA.  */
4432
4433 static void
4434 iv_ca_delta_free (struct iv_ca_delta **delta)
4435 {
4436   struct iv_ca_delta *act, *next;
4437
4438   for (act = *delta; act; act = next)
4439     {
4440       next = act->next_change;
4441       free (act);
4442     }
4443
4444   *delta = NULL;
4445 }
4446
4447 /* Allocates new iv candidates assignment.  */
4448
4449 static struct iv_ca *
4450 iv_ca_new (struct ivopts_data *data)
4451 {
4452   struct iv_ca *nw = XNEW (struct iv_ca);
4453
4454   nw->upto = 0;
4455   nw->bad_uses = 0;
4456   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4457   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4458   nw->cands = BITMAP_ALLOC (NULL);
4459   nw->n_cands = 0;
4460   nw->n_regs = 0;
4461   nw->cand_use_cost = zero_cost;
4462   nw->cand_cost = 0;
4463   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4464   nw->cost = zero_cost;
4465
4466   return nw;
4467 }
4468
4469 /* Free memory occupied by the set IVS.  */
4470
4471 static void
4472 iv_ca_free (struct iv_ca **ivs)
4473 {
4474   free ((*ivs)->cand_for_use);
4475   free ((*ivs)->n_cand_uses);
4476   BITMAP_FREE ((*ivs)->cands);
4477   free ((*ivs)->n_invariant_uses);
4478   free (*ivs);
4479   *ivs = NULL;
4480 }
4481
4482 /* Dumps IVS to FILE.  */
4483
4484 static void
4485 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4486 {
4487   const char *pref = "  invariants ";
4488   unsigned i;
4489   comp_cost cost = iv_ca_cost (ivs);
4490
4491   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4492   bitmap_print (file, ivs->cands, "  candidates ","\n");
4493
4494   for (i = 1; i <= data->max_inv_id; i++)
4495     if (ivs->n_invariant_uses[i])
4496       {
4497         fprintf (file, "%s%d", pref, i);
4498         pref = ", ";
4499       }
4500   fprintf (file, "\n");
4501 }
4502
4503 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4504    new set, and store differences in DELTA.  Number of induction variables
4505    in the new set is stored to N_IVS.  */
4506
4507 static comp_cost
4508 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4509               struct iv_cand *cand, struct iv_ca_delta **delta,
4510               unsigned *n_ivs)
4511 {
4512   unsigned i;
4513   comp_cost cost;
4514   struct iv_use *use;
4515   struct cost_pair *old_cp, *new_cp;
4516
4517   *delta = NULL;
4518   for (i = 0; i < ivs->upto; i++)
4519     {
4520       use = iv_use (data, i);
4521       old_cp = iv_ca_cand_for_use (ivs, use);
4522
4523       if (old_cp
4524           && old_cp->cand == cand)
4525         continue;
4526
4527       new_cp = get_use_iv_cost (data, use, cand);
4528       if (!new_cp)
4529         continue;
4530
4531       if (!iv_ca_has_deps (ivs, new_cp))
4532         continue;
4533
4534       if (!cheaper_cost_pair (new_cp, old_cp))
4535         continue;
4536
4537       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4538     }
4539
4540   iv_ca_delta_commit (data, ivs, *delta, true);
4541   cost = iv_ca_cost (ivs);
4542   if (n_ivs)
4543     *n_ivs = iv_ca_n_cands (ivs);
4544   iv_ca_delta_commit (data, ivs, *delta, false);
4545
4546   return cost;
4547 }
4548
4549 /* Try narrowing set IVS by removing CAND.  Return the cost of
4550    the new set and store the differences in DELTA.  */
4551
4552 static comp_cost
4553 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4554               struct iv_cand *cand, struct iv_ca_delta **delta)
4555 {
4556   unsigned i, ci;
4557   struct iv_use *use;
4558   struct cost_pair *old_cp, *new_cp, *cp;
4559   bitmap_iterator bi;
4560   struct iv_cand *cnd;
4561   comp_cost cost;
4562
4563   *delta = NULL;
4564   for (i = 0; i < n_iv_uses (data); i++)
4565     {
4566       use = iv_use (data, i);
4567
4568       old_cp = iv_ca_cand_for_use (ivs, use);
4569       if (old_cp->cand != cand)
4570         continue;
4571
4572       new_cp = NULL;
4573
4574       if (data->consider_all_candidates)
4575         {
4576           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4577             {
4578               if (ci == cand->id)
4579                 continue;
4580
4581               cnd = iv_cand (data, ci);
4582
4583               cp = get_use_iv_cost (data, use, cnd);
4584               if (!cp)
4585                 continue;
4586               if (!iv_ca_has_deps (ivs, cp))
4587                 continue;
4588
4589               if (!cheaper_cost_pair (cp, new_cp))
4590                 continue;
4591
4592               new_cp = cp;
4593             }
4594         }
4595       else
4596         {
4597           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4598             {
4599               if (ci == cand->id)
4600                 continue;
4601
4602               cnd = iv_cand (data, ci);
4603
4604               cp = get_use_iv_cost (data, use, cnd);
4605               if (!cp)
4606                 continue;
4607               if (!iv_ca_has_deps (ivs, cp))
4608                 continue;
4609
4610               if (!cheaper_cost_pair (cp, new_cp))
4611                 continue;
4612
4613               new_cp = cp;
4614             }
4615         }
4616
4617       if (!new_cp)
4618         {
4619           iv_ca_delta_free (delta);
4620           return infinite_cost;
4621         }
4622
4623       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4624     }
4625
4626   iv_ca_delta_commit (data, ivs, *delta, true);
4627   cost = iv_ca_cost (ivs);
4628   iv_ca_delta_commit (data, ivs, *delta, false);
4629
4630   return cost;
4631 }
4632
4633 /* Try optimizing the set of candidates IVS by removing candidates different
4634    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4635    differences in DELTA.  */
4636
4637 static comp_cost
4638 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4639              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4640 {
4641   bitmap_iterator bi;
4642   struct iv_ca_delta *act_delta, *best_delta;
4643   unsigned i;
4644   comp_cost best_cost, acost;
4645   struct iv_cand *cand;
4646
4647   best_delta = NULL;
4648   best_cost = iv_ca_cost (ivs);
4649
4650   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4651     {
4652       cand = iv_cand (data, i);
4653
4654       if (cand == except_cand)
4655         continue;
4656
4657       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4658
4659       if (compare_costs (acost, best_cost) < 0)
4660         {
4661           best_cost = acost;
4662           iv_ca_delta_free (&best_delta);
4663           best_delta = act_delta;
4664         }
4665       else
4666         iv_ca_delta_free (&act_delta);
4667     }
4668
4669   if (!best_delta)
4670     {
4671       *delta = NULL;
4672       return best_cost;
4673     }
4674
4675   /* Recurse to possibly remove other unnecessary ivs.  */
4676   iv_ca_delta_commit (data, ivs, best_delta, true);
4677   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4678   iv_ca_delta_commit (data, ivs, best_delta, false);
4679   *delta = iv_ca_delta_join (best_delta, *delta);
4680   return best_cost;
4681 }
4682
4683 /* Tries to extend the sets IVS in the best possible way in order
4684    to express the USE.  */
4685
4686 static bool
4687 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4688                   struct iv_use *use)
4689 {
4690   comp_cost best_cost, act_cost;
4691   unsigned i;
4692   bitmap_iterator bi;
4693   struct iv_cand *cand;
4694   struct iv_ca_delta *best_delta = NULL, *act_delta;
4695   struct cost_pair *cp;
4696
4697   iv_ca_add_use (data, ivs, use);
4698   best_cost = iv_ca_cost (ivs);
4699
4700   cp = iv_ca_cand_for_use (ivs, use);
4701   if (cp)
4702     {
4703       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4704       iv_ca_set_no_cp (data, ivs, use);
4705     }
4706
4707   /* First try important candidates not based on any memory object.  Only if
4708      this fails, try the specific ones.  Rationale -- in loops with many
4709      variables the best choice often is to use just one generic biv.  If we
4710      added here many ivs specific to the uses, the optimization algorithm later
4711      would be likely to get stuck in a local minimum, thus causing us to create
4712      too many ivs.  The approach from few ivs to more seems more likely to be
4713      successful -- starting from few ivs, replacing an expensive use by a
4714      specific iv should always be a win.  */
4715   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4716     {
4717       cand = iv_cand (data, i);
4718
4719       if (cand->iv->base_object != NULL_TREE)
4720         continue;
4721
4722       if (iv_ca_cand_used_p (ivs, cand))
4723         continue;
4724
4725       cp = get_use_iv_cost (data, use, cand);
4726       if (!cp)
4727         continue;
4728
4729       iv_ca_set_cp (data, ivs, use, cp);
4730       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4731       iv_ca_set_no_cp (data, ivs, use);
4732       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4733
4734       if (compare_costs (act_cost, best_cost) < 0)
4735         {
4736           best_cost = act_cost;
4737
4738           iv_ca_delta_free (&best_delta);
4739           best_delta = act_delta;
4740         }
4741       else
4742         iv_ca_delta_free (&act_delta);
4743     }
4744
4745   if (infinite_cost_p (best_cost))
4746     {
4747       for (i = 0; i < use->n_map_members; i++)
4748         {
4749           cp = use->cost_map + i;
4750           cand = cp->cand;
4751           if (!cand)
4752             continue;
4753
4754           /* Already tried this.  */
4755           if (cand->important && cand->iv->base_object == NULL_TREE)
4756             continue;
4757
4758           if (iv_ca_cand_used_p (ivs, cand))
4759             continue;
4760
4761           act_delta = NULL;
4762           iv_ca_set_cp (data, ivs, use, cp);
4763           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4764           iv_ca_set_no_cp (data, ivs, use);
4765           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4766                                        cp, act_delta);
4767
4768           if (compare_costs (act_cost, best_cost) < 0)
4769             {
4770               best_cost = act_cost;
4771
4772               if (best_delta)
4773                 iv_ca_delta_free (&best_delta);
4774               best_delta = act_delta;
4775             }
4776           else
4777             iv_ca_delta_free (&act_delta);
4778         }
4779     }
4780
4781   iv_ca_delta_commit (data, ivs, best_delta, true);
4782   iv_ca_delta_free (&best_delta);
4783
4784   return !infinite_cost_p (best_cost);
4785 }
4786
4787 /* Finds an initial assignment of candidates to uses.  */
4788
4789 static struct iv_ca *
4790 get_initial_solution (struct ivopts_data *data)
4791 {
4792   struct iv_ca *ivs = iv_ca_new (data);
4793   unsigned i;
4794
4795   for (i = 0; i < n_iv_uses (data); i++)
4796     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4797       {
4798         iv_ca_free (&ivs);
4799         return NULL;
4800       }
4801
4802   return ivs;
4803 }
4804
4805 /* Tries to improve set of induction variables IVS.  */
4806
4807 static bool
4808 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4809 {
4810   unsigned i, n_ivs;
4811   comp_cost acost, best_cost = iv_ca_cost (ivs);
4812   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4813   struct iv_cand *cand;
4814
4815   /* Try extending the set of induction variables by one.  */
4816   for (i = 0; i < n_iv_cands (data); i++)
4817     {
4818       cand = iv_cand (data, i);
4819
4820       if (iv_ca_cand_used_p (ivs, cand))
4821         continue;
4822
4823       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4824       if (!act_delta)
4825         continue;
4826
4827       /* If we successfully added the candidate and the set is small enough,
4828          try optimizing it by removing other candidates.  */
4829       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4830         {
4831           iv_ca_delta_commit (data, ivs, act_delta, true);
4832           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4833           iv_ca_delta_commit (data, ivs, act_delta, false);
4834           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4835         }
4836
4837       if (compare_costs (acost, best_cost) < 0)
4838         {
4839           best_cost = acost;
4840           iv_ca_delta_free (&best_delta);
4841           best_delta = act_delta;
4842         }
4843       else
4844         iv_ca_delta_free (&act_delta);
4845     }
4846
4847   if (!best_delta)
4848     {
4849       /* Try removing the candidates from the set instead.  */
4850       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4851
4852       /* Nothing more we can do.  */
4853       if (!best_delta)
4854         return false;
4855     }
4856
4857   iv_ca_delta_commit (data, ivs, best_delta, true);
4858   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4859   iv_ca_delta_free (&best_delta);
4860   return true;
4861 }
4862
4863 /* Attempts to find the optimal set of induction variables.  We do simple
4864    greedy heuristic -- we try to replace at most one candidate in the selected
4865    solution and remove the unused ivs while this improves the cost.  */
4866
4867 static struct iv_ca *
4868 find_optimal_iv_set (struct ivopts_data *data)
4869 {
4870   unsigned i;
4871   struct iv_ca *set;
4872   struct iv_use *use;
4873
4874   /* Get the initial solution.  */
4875   set = get_initial_solution (data);
4876   if (!set)
4877     {
4878       if (dump_file && (dump_flags & TDF_DETAILS))
4879         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4880       return NULL;
4881     }
4882
4883   if (dump_file && (dump_flags & TDF_DETAILS))
4884     {
4885       fprintf (dump_file, "Initial set of candidates:\n");
4886       iv_ca_dump (data, dump_file, set);
4887     }
4888
4889   while (try_improve_iv_set (data, set))
4890     {
4891       if (dump_file && (dump_flags & TDF_DETAILS))
4892         {
4893           fprintf (dump_file, "Improved to:\n");
4894           iv_ca_dump (data, dump_file, set);
4895         }
4896     }
4897
4898   if (dump_file && (dump_flags & TDF_DETAILS))
4899     {
4900       comp_cost cost = iv_ca_cost (set);
4901       fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4902     }
4903
4904   for (i = 0; i < n_iv_uses (data); i++)
4905     {
4906       use = iv_use (data, i);
4907       use->selected = iv_ca_cand_for_use (set, use)->cand;
4908     }
4909
4910   return set;
4911 }
4912
4913 /* Creates a new induction variable corresponding to CAND.  */
4914
4915 static void
4916 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4917 {
4918   block_stmt_iterator incr_pos;
4919   tree base;
4920   bool after = false;
4921
4922   if (!cand->iv)
4923     return;
4924
4925   switch (cand->pos)
4926     {
4927     case IP_NORMAL:
4928       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4929       break;
4930
4931     case IP_END:
4932       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4933       after = true;
4934       break;
4935
4936     case IP_ORIGINAL:
4937       /* Mark that the iv is preserved.  */
4938       name_info (data, cand->var_before)->preserve_biv = true;
4939       name_info (data, cand->var_after)->preserve_biv = true;
4940
4941       /* Rewrite the increment so that it uses var_before directly.  */
4942       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4943
4944       return;
4945     }
4946
4947   gimple_add_tmp_var (cand->var_before);
4948   add_referenced_var (cand->var_before);
4949
4950   base = unshare_expr (cand->iv->base);
4951
4952   create_iv (base, unshare_expr (cand->iv->step),
4953              cand->var_before, data->current_loop,
4954              &incr_pos, after, &cand->var_before, &cand->var_after);
4955 }
4956
4957 /* Creates new induction variables described in SET.  */
4958
4959 static void
4960 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4961 {
4962   unsigned i;
4963   struct iv_cand *cand;
4964   bitmap_iterator bi;
4965
4966   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4967     {
4968       cand = iv_cand (data, i);
4969       create_new_iv (data, cand);
4970     }
4971 }
4972
4973 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4974    is true, remove also the ssa name defined by the statement.  */
4975
4976 static void
4977 remove_statement (tree stmt, bool including_defined_name)
4978 {
4979   if (TREE_CODE (stmt) == PHI_NODE)
4980     {
4981       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4982     }
4983   else
4984     {
4985       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4986
4987       bsi_remove (&bsi, true);
4988       release_defs (stmt);
4989     }
4990 }
4991
4992 /* Rewrites USE (definition of iv used in a nonlinear expression)
4993    using candidate CAND.  */
4994
4995 static void
4996 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4997                             struct iv_use *use, struct iv_cand *cand)
4998 {
4999   tree comp;
5000   tree op, tgt, ass;
5001   block_stmt_iterator bsi;
5002
5003   /* An important special case -- if we are asked to express value of
5004      the original iv by itself, just exit; there is no need to
5005      introduce a new computation (that might also need casting the
5006      variable to unsigned and back).  */
5007   if (cand->pos == IP_ORIGINAL
5008       && cand->incremented_at == use->stmt)
5009     {
5010       tree step, ctype, utype;
5011       enum tree_code incr_code = PLUS_EXPR;
5012
5013       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
5014       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
5015
5016       step = cand->iv->step;
5017       ctype = TREE_TYPE (step);
5018       utype = TREE_TYPE (cand->var_after);
5019       if (TREE_CODE (step) == NEGATE_EXPR)
5020         {
5021           incr_code = MINUS_EXPR;
5022           step = TREE_OPERAND (step, 0);
5023         }
5024
5025       /* Check whether we may leave the computation unchanged.
5026          This is the case only if it does not rely on other
5027          computations in the loop -- otherwise, the computation
5028          we rely upon may be removed in remove_unused_ivs,
5029          thus leading to ICE.  */
5030       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
5031       if (TREE_CODE (op) == PLUS_EXPR
5032           || TREE_CODE (op) == MINUS_EXPR
5033           || TREE_CODE (op) == POINTER_PLUS_EXPR)
5034         {
5035           if (TREE_OPERAND (op, 0) == cand->var_before)
5036             op = TREE_OPERAND (op, 1);
5037           else if (TREE_CODE (op) != MINUS_EXPR
5038                    && TREE_OPERAND (op, 1) == cand->var_before)
5039             op = TREE_OPERAND (op, 0);
5040           else
5041             op = NULL_TREE;
5042         }
5043       else
5044         op = NULL_TREE;
5045
5046       if (op
5047           && (TREE_CODE (op) == INTEGER_CST
5048               || operand_equal_p (op, step, 0)))
5049         return;
5050
5051       /* Otherwise, add the necessary computations to express
5052          the iv.  */
5053       op = fold_convert (ctype, cand->var_before);
5054       comp = fold_convert (utype,
5055                            build2 (incr_code, ctype, op,
5056                                    unshare_expr (step)));
5057     }
5058   else
5059     {
5060       comp = get_computation (data->current_loop, use, cand);
5061       gcc_assert (comp != NULL_TREE);
5062     }
5063
5064   switch (TREE_CODE (use->stmt))
5065     {
5066     case PHI_NODE:
5067       tgt = PHI_RESULT (use->stmt);
5068
5069       /* If we should keep the biv, do not replace it.  */
5070       if (name_info (data, tgt)->preserve_biv)
5071         return;
5072
5073       bsi = bsi_after_labels (bb_for_stmt (use->stmt));
5074       break;
5075
5076     case GIMPLE_MODIFY_STMT:
5077       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
5078       bsi = bsi_for_stmt (use->stmt);
5079       break;
5080
5081     default:
5082       gcc_unreachable ();
5083     }
5084
5085   op = force_gimple_operand_bsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5086                                  true, BSI_SAME_STMT);
5087
5088   if (TREE_CODE (use->stmt) == PHI_NODE)
5089     {
5090       ass = build_gimple_modify_stmt (tgt, op);
5091       bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
5092       remove_statement (use->stmt, false);
5093       SSA_NAME_DEF_STMT (tgt) = ass;
5094     }
5095   else
5096     GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
5097 }
5098
5099 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5100    for_each_index.  */
5101
5102 static bool
5103 idx_remove_ssa_names (tree base, tree *idx,
5104                       void *data ATTRIBUTE_UNUSED)
5105 {
5106   tree *op;
5107
5108   if (TREE_CODE (*idx) == SSA_NAME)
5109     *idx = SSA_NAME_VAR (*idx);
5110
5111   if (TREE_CODE (base) == ARRAY_REF)
5112     {
5113       op = &TREE_OPERAND (base, 2);
5114       if (*op
5115           && TREE_CODE (*op) == SSA_NAME)
5116         *op = SSA_NAME_VAR (*op);
5117       op = &TREE_OPERAND (base, 3);
5118       if (*op
5119           && TREE_CODE (*op) == SSA_NAME)
5120         *op = SSA_NAME_VAR (*op);
5121     }
5122
5123   return true;
5124 }
5125
5126 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5127
5128 static tree
5129 unshare_and_remove_ssa_names (tree ref)
5130 {
5131   ref = unshare_expr (ref);
5132   for_each_index (&ref, idx_remove_ssa_names, NULL);
5133
5134   return ref;
5135 }
5136
5137 /* Extract the alias analysis info for the memory reference REF.  There are
5138    several ways how this information may be stored and what precisely is
5139    its semantics depending on the type of the reference, but there always is
5140    somewhere hidden one _DECL node that is used to determine the set of
5141    virtual operands for the reference.  The code below deciphers this jungle
5142    and extracts this single useful piece of information.  */
5143
5144 static tree
5145 get_ref_tag (tree ref, tree orig)
5146 {
5147   tree var = get_base_address (ref);
5148   tree aref = NULL_TREE, tag, sv;
5149   HOST_WIDE_INT offset, size, maxsize;
5150
5151   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5152     {
5153       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5154       if (ref)
5155         break;
5156     }
5157
5158   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5159     return aref;
5160
5161   if (!var)
5162     return NULL_TREE;
5163
5164   if (TREE_CODE (var) == INDIRECT_REF)
5165     {
5166       /* If the base is a dereference of a pointer, first check its name memory
5167          tag.  If it does not have one, use its symbol memory tag.  */
5168       var = TREE_OPERAND (var, 0);
5169       if (TREE_CODE (var) != SSA_NAME)
5170         return NULL_TREE;
5171
5172       if (SSA_NAME_PTR_INFO (var))
5173         {
5174           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5175           if (tag)
5176             return tag;
5177         }
5178
5179       var = SSA_NAME_VAR (var);
5180       tag = symbol_mem_tag (var);
5181       gcc_assert (tag != NULL_TREE);
5182       return tag;
5183     }
5184   else
5185     {
5186       if (!DECL_P (var))
5187         return NULL_TREE;
5188
5189       tag = symbol_mem_tag (var);
5190       if (tag)
5191         return tag;
5192
5193       return var;
5194     }
5195 }
5196
5197 /* Copies the reference information from OLD_REF to NEW_REF.  */
5198
5199 static void
5200 copy_ref_info (tree new_ref, tree old_ref)
5201 {
5202   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5203     copy_mem_ref_info (new_ref, old_ref);
5204   else
5205     {
5206       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5207       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5208     }
5209 }
5210
5211 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5212
5213 static void
5214 rewrite_use_address (struct ivopts_data *data,
5215                      struct iv_use *use, struct iv_cand *cand)
5216 {
5217   aff_tree aff;
5218   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5219   tree ref;
5220   bool ok;
5221
5222   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5223   gcc_assert (ok);
5224   unshare_aff_combination (&aff);
5225
5226   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5227   copy_ref_info (ref, *use->op_p);
5228   *use->op_p = ref;
5229 }
5230
5231 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5232    candidate CAND.  */
5233
5234 static void
5235 rewrite_use_compare (struct ivopts_data *data,
5236                      struct iv_use *use, struct iv_cand *cand)
5237 {
5238   tree comp, *var_p, op, bound;
5239   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5240   enum tree_code compare;
5241   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5242   bool ok;
5243
5244   bound = cp->value;
5245   if (bound)
5246     {
5247       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5248       tree var_type = TREE_TYPE (var);
5249
5250       compare = iv_elimination_compare (data, use);
5251       bound = unshare_expr (fold_convert (var_type, bound));
5252       op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE,
5253                                      true, BSI_SAME_STMT);
5254
5255       *use->op_p = build2 (compare, boolean_type_node, var, op);
5256       return;
5257     }
5258
5259   /* The induction variable elimination failed; just express the original
5260      giv.  */
5261   comp = get_computation (data->current_loop, use, cand);
5262   gcc_assert (comp != NULL_TREE);
5263
5264   ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5265   gcc_assert (ok);
5266
5267   *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5268                                      true, BSI_SAME_STMT);
5269 }
5270
5271 /* Rewrites USE using candidate CAND.  */
5272
5273 static void
5274 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5275 {
5276   push_stmt_changes (&use->stmt);
5277
5278   switch (use->type)
5279     {
5280       case USE_NONLINEAR_EXPR:
5281         rewrite_use_nonlinear_expr (data, use, cand);
5282         break;
5283
5284       case USE_ADDRESS:
5285         rewrite_use_address (data, use, cand);
5286         break;
5287
5288       case USE_COMPARE:
5289         rewrite_use_compare (data, use, cand);
5290         break;
5291
5292       default:
5293         gcc_unreachable ();
5294     }
5295
5296   pop_stmt_changes (&use->stmt);
5297 }
5298
5299 /* Rewrite the uses using the selected induction variables.  */
5300
5301 static void
5302 rewrite_uses (struct ivopts_data *data)
5303 {
5304   unsigned i;
5305   struct iv_cand *cand;
5306   struct iv_use *use;
5307
5308   for (i = 0; i < n_iv_uses (data); i++)
5309     {
5310       use = iv_use (data, i);
5311       cand = use->selected;
5312       gcc_assert (cand);
5313
5314       rewrite_use (data, use, cand);
5315     }
5316 }
5317
5318 /* Removes the ivs that are not used after rewriting.  */
5319
5320 static void
5321 remove_unused_ivs (struct ivopts_data *data)
5322 {
5323   unsigned j;
5324   bitmap_iterator bi;
5325
5326   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5327     {
5328       struct version_info *info;
5329
5330       info = ver_info (data, j);
5331       if (info->iv
5332           && !integer_zerop (info->iv->step)
5333           && !info->inv_id
5334           && !info->iv->have_use_for
5335           && !info->preserve_biv)
5336         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5337     }
5338 }
5339
5340 /* Frees data allocated by the optimization of a single loop.  */
5341
5342 static void
5343 free_loop_data (struct ivopts_data *data)
5344 {
5345   unsigned i, j;
5346   bitmap_iterator bi;
5347   tree obj;
5348
5349   if (data->niters)
5350     {
5351       pointer_map_destroy (data->niters);
5352       data->niters = NULL;
5353     }
5354
5355   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5356     {
5357       struct version_info *info;
5358
5359       info = ver_info (data, i);
5360       if (info->iv)
5361         free (info->iv);
5362       info->iv = NULL;
5363       info->has_nonlin_use = false;
5364       info->preserve_biv = false;
5365       info->inv_id = 0;
5366     }
5367   bitmap_clear (data->relevant);
5368   bitmap_clear (data->important_candidates);
5369
5370   for (i = 0; i < n_iv_uses (data); i++)
5371     {
5372       struct iv_use *use = iv_use (data, i);
5373
5374       free (use->iv);
5375       BITMAP_FREE (use->related_cands);
5376       for (j = 0; j < use->n_map_members; j++)
5377         if (use->cost_map[j].depends_on)
5378           BITMAP_FREE (use->cost_map[j].depends_on);
5379       free (use->cost_map);
5380       free (use);
5381     }
5382   VEC_truncate (iv_use_p, data->iv_uses, 0);
5383
5384   for (i = 0; i < n_iv_cands (data); i++)
5385     {
5386       struct iv_cand *cand = iv_cand (data, i);
5387
5388       if (cand->iv)
5389         free (cand->iv);
5390       if (cand->depends_on)
5391         BITMAP_FREE (cand->depends_on);
5392       free (cand);
5393     }
5394   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5395
5396   if (data->version_info_size < num_ssa_names)
5397     {
5398       data->version_info_size = 2 * num_ssa_names;
5399       free (data->version_info);
5400       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5401     }
5402
5403   data->max_inv_id = 0;
5404
5405   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5406     SET_DECL_RTL (obj, NULL_RTX);
5407
5408   VEC_truncate (tree, decl_rtl_to_reset, 0);
5409 }
5410
5411 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5412    loop tree.  */
5413
5414 static void
5415 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5416 {
5417   free_loop_data (data);
5418   free (data->version_info);
5419   BITMAP_FREE (data->relevant);
5420   BITMAP_FREE (data->important_candidates);
5421
5422   VEC_free (tree, heap, decl_rtl_to_reset);
5423   VEC_free (iv_use_p, heap, data->iv_uses);
5424   VEC_free (iv_cand_p, heap, data->iv_candidates);
5425 }
5426
5427 /* Optimizes the LOOP.  Returns true if anything changed.  */
5428
5429 static bool
5430 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5431 {
5432   bool changed = false;
5433   struct iv_ca *iv_ca;
5434   edge exit;
5435
5436   gcc_assert (!data->niters);
5437   data->current_loop = loop;
5438
5439   if (dump_file && (dump_flags & TDF_DETAILS))
5440     {
5441       fprintf (dump_file, "Processing loop %d\n", loop->num);
5442
5443       exit = single_dom_exit (loop);
5444       if (exit)
5445         {
5446           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5447                    exit->src->index, exit->dest->index);
5448           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5449           fprintf (dump_file, "\n");
5450         }
5451
5452       fprintf (dump_file, "\n");
5453     }
5454
5455   /* For each ssa name determines whether it behaves as an induction variable
5456      in some loop.  */
5457   if (!find_induction_variables (data))
5458     goto finish;
5459
5460   /* Finds interesting uses (item 1).  */
5461   find_interesting_uses (data);
5462   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5463     goto finish;
5464
5465   /* Finds candidates for the induction variables (item 2).  */
5466   find_iv_candidates (data);
5467
5468   /* Calculates the costs (item 3, part 1).  */
5469   determine_use_iv_costs (data);
5470   determine_iv_costs (data);
5471   determine_set_costs (data);
5472
5473   /* Find the optimal set of induction variables (item 3, part 2).  */
5474   iv_ca = find_optimal_iv_set (data);
5475   if (!iv_ca)
5476     goto finish;
5477   changed = true;
5478
5479   /* Create the new induction variables (item 4, part 1).  */
5480   create_new_ivs (data, iv_ca);
5481   iv_ca_free (&iv_ca);
5482
5483   /* Rewrite the uses (item 4, part 2).  */
5484   rewrite_uses (data);
5485
5486   /* Remove the ivs that are unused after rewriting.  */
5487   remove_unused_ivs (data);
5488
5489   /* We have changed the structure of induction variables; it might happen
5490      that definitions in the scev database refer to some of them that were
5491      eliminated.  */
5492   scev_reset ();
5493
5494 finish:
5495   free_loop_data (data);
5496
5497   return changed;
5498 }
5499
5500 /* Main entry point.  Optimizes induction variables in loops.  */
5501
5502 void
5503 tree_ssa_iv_optimize (void)
5504 {
5505   struct loop *loop;
5506   struct ivopts_data data;
5507   loop_iterator li;
5508
5509   tree_ssa_iv_optimize_init (&data);
5510
5511   /* Optimize the loops starting with the innermost ones.  */
5512   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5513     {
5514       if (dump_file && (dump_flags & TDF_DETAILS))
5515         flow_loop_dump (loop, dump_file, NULL, 1);
5516
5517       tree_ssa_iv_optimize_loop (&data, loop);
5518     }
5519
5520   tree_ssa_iv_optimize_finalize (&data);
5521 }