gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Information attached to loop.  */
 125 struct loop_data
 126 {
 127   unsigned regs_used;   /* Number of registers used.  */
 128 };
 129
 130 /* Types of uses.  */
 131 enum use_type
 132 {
 133   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 134   USE_OUTER,            /* The induction variable is used outside the loop.  */
 135   USE_ADDRESS,          /* Use in an address.  */
 136   USE_COMPARE           /* Use is a compare.  */
 137 };
 138
 139 /* The candidate - cost pair.  */
 140 struct cost_pair
 141 {
 142   struct iv_cand *cand; /* The candidate.  */
 143   unsigned cost;        /* The cost.  */
 144   bitmap depends_on;    /* The list of invariants that have to be
 145                            preserved.  */
 146   tree value;           /* For final value elimination, the expression for
 147                            the final value of the iv.  For iv elimination,
 148                            the new bound to compare with.  */
 149 };
 150
 151 /* Use.  */
 152 struct iv_use
 153 {
 154   unsigned id;          /* The id of the use.  */
 155   enum use_type type;   /* Type of the use.  */
 156   struct iv *iv;        /* The induction variable it is based on.  */
 157   tree stmt;            /* Statement in that it occurs.  */
 158   tree *op_p;           /* The place where it occurs.  */
 159   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 160                            important ones.  */
 161
 162   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 163   struct cost_pair *cost_map;
 164                         /* The costs wrto the iv candidates.  */
 165
 166   struct iv_cand *selected;
 167                         /* The selected candidate.  */
 168 };
 169
 170 /* The position where the iv is computed.  */
 171 enum iv_position
 172 {
 173   IP_NORMAL,            /* At the end, just before the exit condition.  */
 174   IP_END,               /* At the end of the latch block.  */
 175   IP_ORIGINAL           /* The original biv.  */
 176 };
 177
 178 /* The induction variable candidate.  */
 179 struct iv_cand
 180 {
 181   unsigned id;          /* The number of the candidate.  */
 182   bool important;       /* Whether this is an "important" candidate, i.e. such
 183                            that it should be considered by all uses.  */
 184   enum iv_position pos; /* Where it is computed.  */
 185   tree incremented_at;  /* For original biv, the statement where it is
 186                            incremented.  */
 187   tree var_before;      /* The variable used for it before increment.  */
 188   tree var_after;       /* The variable used for it after increment.  */
 189   struct iv *iv;        /* The value of the candidate.  NULL for
 190                            "pseudocandidate" used to indicate the possibility
 191                            to replace the final value of an iv by direct
 192                            computation of the value.  */
 193   unsigned cost;        /* Cost of the candidate.  */
 194 };
 195
 196 /* The data used by the induction variable optimizations.  */
 197
 198 typedef struct iv_use *iv_use_p;
 199 DEF_VEC_P(iv_use_p);
 200 DEF_VEC_ALLOC_P(iv_use_p,heap);
 201
 202 typedef struct iv_cand *iv_cand_p;
 203 DEF_VEC_P(iv_cand_p);
 204 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 205
 206 struct ivopts_data
 207 {
 208   /* The currently optimized loop.  */
 209   struct loop *current_loop;
 210
 211   /* Numbers of iterations for all exits of the current loop.  */
 212   htab_t niters;
 213
 214   /* The size of version_info array allocated.  */
 215   unsigned version_info_size;
 216
 217   /* The array of information for the ssa names.  */
 218   struct version_info *version_info;
 219
 220   /* The bitmap of indices in version_info whose value was changed.  */
 221   bitmap relevant;
 222
 223   /* The maximum invariant id.  */
 224   unsigned max_inv_id;
 225
 226   /* The uses of induction variables.  */
 227   VEC(iv_use_p,heap) *iv_uses;
 228
 229   /* The candidates.  */
 230   VEC(iv_cand_p,heap) *iv_candidates;
 231
 232   /* A bitmap of important candidates.  */
 233   bitmap important_candidates;
 234
 235   /* Whether to consider just related and important candidates when replacing a
 236      use.  */
 237   bool consider_all_candidates;
 238 };
 239
 240 /* An assignment of iv candidates to uses.  */
 241
 242 struct iv_ca
 243 {
 244   /* The number of uses covered by the assignment.  */
 245   unsigned upto;
 246
 247   /* Number of uses that cannot be expressed by the candidates in the set.  */
 248   unsigned bad_uses;
 249
 250   /* Candidate assigned to a use, together with the related costs.  */
 251   struct cost_pair **cand_for_use;
 252
 253   /* Number of times each candidate is used.  */
 254   unsigned *n_cand_uses;
 255
 256   /* The candidates used.  */
 257   bitmap cands;
 258
 259   /* The number of candidates in the set.  */
 260   unsigned n_cands;
 261
 262   /* Total number of registers needed.  */
 263   unsigned n_regs;
 264
 265   /* Total cost of expressing uses.  */
 266   unsigned cand_use_cost;
 267
 268   /* Total cost of candidates.  */
 269   unsigned cand_cost;
 270
 271   /* Number of times each invariant is used.  */
 272   unsigned *n_invariant_uses;
 273
 274   /* Total cost of the assignment.  */
 275   unsigned cost;
 276 };
 277
 278 /* Difference of two iv candidate assignments.  */
 279
 280 struct iv_ca_delta
 281 {
 282   /* Changed use.  */
 283   struct iv_use *use;
 284
 285   /* An old assignment (for rollback purposes).  */
 286   struct cost_pair *old_cp;
 287
 288   /* A new assignment.  */
 289   struct cost_pair *new_cp;
 290
 291   /* Next change in the list.  */
 292   struct iv_ca_delta *next_change;
 293 };
 294
 295 /* Bound on number of candidates below that all candidates are considered.  */
 296
 297 #define CONSIDER_ALL_CANDIDATES_BOUND \
 298   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 299
 300 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 301    optimizing such a loop would help, and it would take ages).  */
 302
 303 #define MAX_CONSIDERED_USES \
 304   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 305
 306 /* If there are at most this number of ivs in the set, try removing unnecessary
 307    ivs from the set always.  */
 308
 309 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 310   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 311
 312 /* The list of trees for that the decl_rtl field must be reset is stored
 313    here.  */
 314
 315 static VEC(tree,heap) *decl_rtl_to_reset;
 316
 317 /* Number of uses recorded in DATA.  */
 318
 319 static inline unsigned
 320 n_iv_uses (struct ivopts_data *data)
 321 {
 322   return VEC_length (iv_use_p, data->iv_uses);
 323 }
 324
 325 /* Ith use recorded in DATA.  */
 326
 327 static inline struct iv_use *
 328 iv_use (struct ivopts_data *data, unsigned i)
 329 {
 330   return VEC_index (iv_use_p, data->iv_uses, i);
 331 }
 332
 333 /* Number of candidates recorded in DATA.  */
 334
 335 static inline unsigned
 336 n_iv_cands (struct ivopts_data *data)
 337 {
 338   return VEC_length (iv_cand_p, data->iv_candidates);
 339 }
 340
 341 /* Ith candidate recorded in DATA.  */
 342
 343 static inline struct iv_cand *
 344 iv_cand (struct ivopts_data *data, unsigned i)
 345 {
 346   return VEC_index (iv_cand_p, data->iv_candidates, i);
 347 }
 348
 349 /* The data for LOOP.  */
 350
 351 static inline struct loop_data *
 352 loop_data (struct loop *loop)
 353 {
 354   return loop->aux;
 355 }
 356
 357 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 358
 359 static edge
 360 single_dom_exit (struct loop *loop)
 361 {
 362   edge exit = loop->single_exit;
 363
 364   if (!exit)
 365     return NULL;
 366
 367   if (!just_once_each_iteration_p (loop, exit->src))
 368     return NULL;
 369
 370   return exit;
 371 }
 372
 373 /* Dumps information about the induction variable IV to FILE.  */
 374
 375 extern void dump_iv (FILE *, struct iv *);
 376 void
 377 dump_iv (FILE *file, struct iv *iv)
 378 {
 379   if (iv->ssa_name)
 380     {
 381       fprintf (file, "ssa name ");
 382       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 383       fprintf (file, "\n");
 384     }
 385
 386   fprintf (file, "  type ");
 387   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 388   fprintf (file, "\n");
 389
 390   if (iv->step)
 391     {
 392       fprintf (file, "  base ");
 393       print_generic_expr (file, iv->base, TDF_SLIM);
 394       fprintf (file, "\n");
 395
 396       fprintf (file, "  step ");
 397       print_generic_expr (file, iv->step, TDF_SLIM);
 398       fprintf (file, "\n");
 399     }
 400   else
 401     {
 402       fprintf (file, "  invariant ");
 403       print_generic_expr (file, iv->base, TDF_SLIM);
 404       fprintf (file, "\n");
 405     }
 406
 407   if (iv->base_object)
 408     {
 409       fprintf (file, "  base object ");
 410       print_generic_expr (file, iv->base_object, TDF_SLIM);
 411       fprintf (file, "\n");
 412     }
 413
 414   if (iv->biv_p)
 415     fprintf (file, "  is a biv\n");
 416 }
 417
 418 /* Dumps information about the USE to FILE.  */
 419
 420 extern void dump_use (FILE *, struct iv_use *);
 421 void
 422 dump_use (FILE *file, struct iv_use *use)
 423 {
 424   fprintf (file, "use %d\n", use->id);
 425
 426   switch (use->type)
 427     {
 428     case USE_NONLINEAR_EXPR:
 429       fprintf (file, "  generic\n");
 430       break;
 431
 432     case USE_OUTER:
 433       fprintf (file, "  outside\n");
 434       break;
 435
 436     case USE_ADDRESS:
 437       fprintf (file, "  address\n");
 438       break;
 439
 440     case USE_COMPARE:
 441       fprintf (file, "  compare\n");
 442       break;
 443
 444     default:
 445       gcc_unreachable ();
 446     }
 447
 448   fprintf (file, "  in statement ");
 449   print_generic_expr (file, use->stmt, TDF_SLIM);
 450   fprintf (file, "\n");
 451
 452   fprintf (file, "  at position ");
 453   if (use->op_p)
 454     print_generic_expr (file, *use->op_p, TDF_SLIM);
 455   fprintf (file, "\n");
 456
 457   dump_iv (file, use->iv);
 458
 459   if (use->related_cands)
 460     {
 461       fprintf (file, "  related candidates ");
 462       dump_bitmap (file, use->related_cands);
 463     }
 464 }
 465
 466 /* Dumps information about the uses to FILE.  */
 467
 468 extern void dump_uses (FILE *, struct ivopts_data *);
 469 void
 470 dump_uses (FILE *file, struct ivopts_data *data)
 471 {
 472   unsigned i;
 473   struct iv_use *use;
 474
 475   for (i = 0; i < n_iv_uses (data); i++)
 476     {
 477       use = iv_use (data, i);
 478
 479       dump_use (file, use);
 480       fprintf (file, "\n");
 481     }
 482 }
 483
 484 /* Dumps information about induction variable candidate CAND to FILE.  */
 485
 486 extern void dump_cand (FILE *, struct iv_cand *);
 487 void
 488 dump_cand (FILE *file, struct iv_cand *cand)
 489 {
 490   struct iv *iv = cand->iv;
 491
 492   fprintf (file, "candidate %d%s\n",
 493            cand->id, cand->important ? " (important)" : "");
 494
 495   if (!iv)
 496     {
 497       fprintf (file, "  final value replacement\n");
 498       return;
 499     }
 500
 501   switch (cand->pos)
 502     {
 503     case IP_NORMAL:
 504       fprintf (file, "  incremented before exit test\n");
 505       break;
 506
 507     case IP_END:
 508       fprintf (file, "  incremented at end\n");
 509       break;
 510
 511     case IP_ORIGINAL:
 512       fprintf (file, "  original biv\n");
 513       break;
 514     }
 515
 516   dump_iv (file, iv);
 517 }
 518
 519 /* Returns the info for ssa version VER.  */
 520
 521 static inline struct version_info *
 522 ver_info (struct ivopts_data *data, unsigned ver)
 523 {
 524   return data->version_info + ver;
 525 }
 526
 527 /* Returns the info for ssa name NAME.  */
 528
 529 static inline struct version_info *
 530 name_info (struct ivopts_data *data, tree name)
 531 {
 532   return ver_info (data, SSA_NAME_VERSION (name));
 533 }
 534
 535 /* Checks whether there exists number X such that X * B = A, counting modulo
 536    2^BITS.  */
 537
 538 static bool
 539 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 540         HOST_WIDE_INT *x)
 541 {
 542   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 543   unsigned HOST_WIDE_INT inv, ex, val;
 544   unsigned i;
 545
 546   a &= mask;
 547   b &= mask;
 548
 549   /* First divide the whole equation by 2 as long as possible.  */
 550   while (!(a & 1) && !(b & 1))
 551     {
 552       a >>= 1;
 553       b >>= 1;
 554       bits--;
 555       mask >>= 1;
 556     }
 557
 558   if (!(b & 1))
 559     {
 560       /* If b is still even, a is odd and there is no such x.  */
 561       return false;
 562     }
 563
 564   /* Find the inverse of b.  We compute it as
 565      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 566   inv = 1;
 567   ex = b;
 568   for (i = 0; i < bits - 1; i++)
 569     {
 570       inv = (inv * ex) & mask;
 571       ex = (ex * ex) & mask;
 572     }
 573
 574   val = (a * inv) & mask;
 575
 576   gcc_assert (((val * b) & mask) == a);
 577
 578   if ((val >> (bits - 1)) & 1)
 579     val |= ~mask;
 580
 581   *x = val;
 582
 583   return true;
 584 }
 585
 586 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 587    emitted in LOOP.  */
 588
 589 static bool
 590 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 591 {
 592   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 593
 594   gcc_assert (bb);
 595
 596   if (sbb == loop->latch)
 597     return true;
 598
 599   if (sbb != bb)
 600     return false;
 601
 602   return stmt == last_stmt (bb);
 603 }
 604
 605 /* Returns true if STMT if after the place where the original induction
 606    variable CAND is incremented.  */
 607
 608 static bool
 609 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 610 {
 611   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 612   basic_block stmt_bb = bb_for_stmt (stmt);
 613   block_stmt_iterator bsi;
 614
 615   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 616     return false;
 617
 618   if (stmt_bb != cand_bb)
 619     return true;
 620
 621   /* Scan the block from the end, since the original ivs are usually
 622      incremented at the end of the loop body.  */
 623   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 624     {
 625       if (bsi_stmt (bsi) == cand->incremented_at)
 626         return false;
 627       if (bsi_stmt (bsi) == stmt)
 628         return true;
 629     }
 630 }
 631
 632 /* Returns true if STMT if after the place where the induction variable
 633    CAND is incremented in LOOP.  */
 634
 635 static bool
 636 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 637 {
 638   switch (cand->pos)
 639     {
 640     case IP_END:
 641       return false;
 642
 643     case IP_NORMAL:
 644       return stmt_after_ip_normal_pos (loop, stmt);
 645
 646     case IP_ORIGINAL:
 647       return stmt_after_ip_original_pos (cand, stmt);
 648
 649     default:
 650       gcc_unreachable ();
 651     }
 652 }
 653
 654 /* Element of the table in that we cache the numbers of iterations obtained
 655    from exits of the loop.  */
 656
 657 struct nfe_cache_elt
 658 {
 659   /* The edge for that the number of iterations is cached.  */
 660   edge exit;
 661
 662   /* True if the # of iterations was successfully determined.  */
 663   bool valid_p;
 664
 665   /* Description of # of iterations.  */
 666   struct tree_niter_desc niter;
 667 };
 668
 669 /* Hash function for nfe_cache_elt E.  */
 670
 671 static hashval_t
 672 nfe_hash (const void *e)
 673 {
 674   const struct nfe_cache_elt *elt = e;
 675
 676   return htab_hash_pointer (elt->exit);
 677 }
 678
 679 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 680
 681 static int
 682 nfe_eq (const void *e1, const void *e2)
 683 {
 684   const struct nfe_cache_elt *elt1 = e1;
 685
 686   return elt1->exit == e2;
 687 }
 688
 689 /*  Returns structure describing number of iterations determined from
 690     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 691
 692 static struct tree_niter_desc *
 693 niter_for_exit (struct ivopts_data *data, edge exit)
 694 {
 695   struct nfe_cache_elt *nfe_desc;
 696   PTR *slot;
 697
 698   slot = htab_find_slot_with_hash (data->niters, exit,
 699                                    htab_hash_pointer (exit),
 700                                    INSERT);
 701
 702   if (!*slot)
 703     {
 704       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 705       nfe_desc->exit = exit;
 706       nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
 707                                                      exit, &nfe_desc->niter);
 708       *slot = nfe_desc;
 709     }
 710   else
 711     nfe_desc = *slot;
 712
 713   if (!nfe_desc->valid_p)
 714     return NULL;
 715
 716   return &nfe_desc->niter;
 717 }
 718
 719 /* Returns structure describing number of iterations determined from
 720    single dominating exit of DATA->current_loop, or NULL if something
 721    goes wrong.  */
 722
 723 static struct tree_niter_desc *
 724 niter_for_single_dom_exit (struct ivopts_data *data)
 725 {
 726   edge exit = single_dom_exit (data->current_loop);
 727
 728   if (!exit)
 729     return NULL;
 730
 731   return niter_for_exit (data, exit);
 732 }
 733
 734 /* Initializes data structures used by the iv optimization pass, stored
 735    in DATA.  LOOPS is the loop tree.  */
 736
 737 static void
 738 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
 739 {
 740   unsigned i;
 741
 742   data->version_info_size = 2 * num_ssa_names;
 743   data->version_info = xcalloc (data->version_info_size,
 744                                 sizeof (struct version_info));
 745   data->relevant = BITMAP_ALLOC (NULL);
 746   data->important_candidates = BITMAP_ALLOC (NULL);
 747   data->max_inv_id = 0;
 748   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 749
 750   for (i = 1; i < loops->num; i++)
 751     if (loops->parray[i])
 752       loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
 753
 754   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 755   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 756   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 757 }
 758
 759 /* Returns a memory object to that EXPR points.  In case we are able to
 760    determine that it does not point to any such object, NULL is returned.  */
 761
 762 static tree
 763 determine_base_object (tree expr)
 764 {
 765   enum tree_code code = TREE_CODE (expr);
 766   tree base, obj, op0, op1;
 767
 768   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 769     return NULL_TREE;
 770
 771   switch (code)
 772     {
 773     case INTEGER_CST:
 774       return NULL_TREE;
 775
 776     case ADDR_EXPR:
 777       obj = TREE_OPERAND (expr, 0);
 778       base = get_base_address (obj);
 779
 780       if (!base)
 781         return expr;
 782
 783       if (TREE_CODE (base) == INDIRECT_REF)
 784         return determine_base_object (TREE_OPERAND (base, 0));
 785
 786       return fold (build1 (ADDR_EXPR, ptr_type_node, base));
 787
 788     case PLUS_EXPR:
 789     case MINUS_EXPR:
 790       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 791       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 792
 793       if (!op1)
 794         return op0;
 795
 796       if (!op0)
 797         return (code == PLUS_EXPR
 798                 ? op1
 799                 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
 800
 801       return fold (build (code, ptr_type_node, op0, op1));
 802
 803     case NOP_EXPR:
 804     case CONVERT_EXPR:
 805       return determine_base_object (TREE_OPERAND (expr, 0));
 806
 807     default:
 808       return fold_convert (ptr_type_node, expr);
 809     }
 810 }
 811
 812 /* Allocates an induction variable with given initial value BASE and step STEP
 813    for loop LOOP.  */
 814
 815 static struct iv *
 816 alloc_iv (tree base, tree step)
 817 {
 818   struct iv *iv = xcalloc (1, sizeof (struct iv));
 819
 820   if (step && integer_zerop (step))
 821     step = NULL_TREE;
 822
 823   iv->base = base;
 824   iv->base_object = determine_base_object (base);
 825   iv->step = step;
 826   iv->biv_p = false;
 827   iv->have_use_for = false;
 828   iv->use_id = 0;
 829   iv->ssa_name = NULL_TREE;
 830
 831   return iv;
 832 }
 833
 834 /* Sets STEP and BASE for induction variable IV.  */
 835
 836 static void
 837 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 838 {
 839   struct version_info *info = name_info (data, iv);
 840
 841   gcc_assert (!info->iv);
 842
 843   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 844   info->iv = alloc_iv (base, step);
 845   info->iv->ssa_name = iv;
 846 }
 847
 848 /* Finds induction variable declaration for VAR.  */
 849
 850 static struct iv *
 851 get_iv (struct ivopts_data *data, tree var)
 852 {
 853   basic_block bb;
 854
 855   if (!name_info (data, var)->iv)
 856     {
 857       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 858
 859       if (!bb
 860           || !flow_bb_inside_loop_p (data->current_loop, bb))
 861         set_iv (data, var, var, NULL_TREE);
 862     }
 863
 864   return name_info (data, var)->iv;
 865 }
 866
 867 /* Determines the step of a biv defined in PHI.  */
 868
 869 static tree
 870 determine_biv_step (tree phi)
 871 {
 872   struct loop *loop = bb_for_stmt (phi)->loop_father;
 873   tree name = PHI_RESULT (phi), base, step;
 874   tree type = TREE_TYPE (name);
 875
 876   if (!is_gimple_reg (name))
 877     return NULL_TREE;
 878
 879   if (!simple_iv (loop, phi, name, &base, &step))
 880     return NULL_TREE;
 881
 882   if (!step)
 883     return build_int_cst (type, 0);
 884
 885   return step;
 886 }
 887
 888 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 889
 890 static bool
 891 abnormal_ssa_name_p (tree exp)
 892 {
 893   if (!exp)
 894     return false;
 895
 896   if (TREE_CODE (exp) != SSA_NAME)
 897     return false;
 898
 899   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 900 }
 901
 902 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 903    abnormal phi node.  Callback for for_each_index.  */
 904
 905 static bool
 906 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 907                                   void *data ATTRIBUTE_UNUSED)
 908 {
 909   if (TREE_CODE (base) == ARRAY_REF)
 910     {
 911       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 912         return false;
 913       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 914         return false;
 915     }
 916
 917   return !abnormal_ssa_name_p (*index);
 918 }
 919
 920 /* Returns true if EXPR contains a ssa name that occurs in an
 921    abnormal phi node.  */
 922
 923 static bool
 924 contains_abnormal_ssa_name_p (tree expr)
 925 {
 926   enum tree_code code = TREE_CODE (expr);
 927   enum tree_code_class class = TREE_CODE_CLASS (code);
 928
 929   if (code == SSA_NAME)
 930     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 931
 932   if (code == INTEGER_CST
 933       || is_gimple_min_invariant (expr))
 934     return false;
 935
 936   if (code == ADDR_EXPR)
 937     return !for_each_index (&TREE_OPERAND (expr, 0),
 938                             idx_contains_abnormal_ssa_name_p,
 939                             NULL);
 940
 941   switch (class)
 942     {
 943     case tcc_binary:
 944     case tcc_comparison:
 945       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 946         return true;
 947
 948       /* Fallthru.  */
 949     case tcc_unary:
 950       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 951         return true;
 952
 953       break;
 954
 955     default:
 956       gcc_unreachable ();
 957     }
 958
 959   return false;
 960 }
 961
 962 /* Finds basic ivs.  */
 963
 964 static bool
 965 find_bivs (struct ivopts_data *data)
 966 {
 967   tree phi, step, type, base;
 968   bool found = false;
 969   struct loop *loop = data->current_loop;
 970
 971   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 972     {
 973       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 974         continue;
 975
 976       step = determine_biv_step (phi);
 977
 978       if (!step)
 979         continue;
 980       if (cst_and_fits_in_hwi (step)
 981           && int_cst_value (step) == 0)
 982         continue;
 983
 984       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 985       if (contains_abnormal_ssa_name_p (base))
 986         continue;
 987
 988       type = TREE_TYPE (PHI_RESULT (phi));
 989       base = fold_convert (type, base);
 990       step = fold_convert (type, step);
 991
 992       /* FIXME: We do not handle induction variables whose step does
 993          not satisfy cst_and_fits_in_hwi.  */
 994       if (!cst_and_fits_in_hwi (step))
 995         continue;
 996
 997       set_iv (data, PHI_RESULT (phi), base, step);
 998       found = true;
 999     }
1000
1001   return found;
1002 }
1003
1004 /* Marks basic ivs.  */
1005
1006 static void
1007 mark_bivs (struct ivopts_data *data)
1008 {
1009   tree phi, var;
1010   struct iv *iv, *incr_iv;
1011   struct loop *loop = data->current_loop;
1012   basic_block incr_bb;
1013
1014   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1015     {
1016       iv = get_iv (data, PHI_RESULT (phi));
1017       if (!iv)
1018         continue;
1019
1020       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1021       incr_iv = get_iv (data, var);
1022       if (!incr_iv)
1023         continue;
1024
1025       /* If the increment is in the subloop, ignore it.  */
1026       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1027       if (incr_bb->loop_father != data->current_loop
1028           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1029         continue;
1030
1031       iv->biv_p = true;
1032       incr_iv->biv_p = true;
1033     }
1034 }
1035
1036 /* Checks whether STMT defines a linear induction variable and stores its
1037    parameters to BASE and STEP.  */
1038
1039 static bool
1040 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1041                         tree *base, tree *step)
1042 {
1043   tree lhs;
1044   struct loop *loop = data->current_loop;
1045
1046   *base = NULL_TREE;
1047   *step = NULL_TREE;
1048
1049   if (TREE_CODE (stmt) != MODIFY_EXPR)
1050     return false;
1051
1052   lhs = TREE_OPERAND (stmt, 0);
1053   if (TREE_CODE (lhs) != SSA_NAME)
1054     return false;
1055
1056   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
1057     return false;
1058
1059   /* FIXME: We do not handle induction variables whose step does
1060      not satisfy cst_and_fits_in_hwi.  */
1061   if (!zero_p (*step)
1062       && !cst_and_fits_in_hwi (*step))
1063     return false;
1064
1065   if (contains_abnormal_ssa_name_p (*base))
1066     return false;
1067
1068   return true;
1069 }
1070
1071 /* Finds general ivs in statement STMT.  */
1072
1073 static void
1074 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1075 {
1076   tree base, step;
1077
1078   if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1079     return;
1080
1081   set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1082 }
1083
1084 /* Finds general ivs in basic block BB.  */
1085
1086 static void
1087 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1088 {
1089   block_stmt_iterator bsi;
1090
1091   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1092     find_givs_in_stmt (data, bsi_stmt (bsi));
1093 }
1094
1095 /* Finds general ivs.  */
1096
1097 static void
1098 find_givs (struct ivopts_data *data)
1099 {
1100   struct loop *loop = data->current_loop;
1101   basic_block *body = get_loop_body_in_dom_order (loop);
1102   unsigned i;
1103
1104   for (i = 0; i < loop->num_nodes; i++)
1105     find_givs_in_bb (data, body[i]);
1106   free (body);
1107 }
1108
1109 /* For each ssa name defined in LOOP determines whether it is an induction
1110    variable and if so, its initial value and step.  */
1111
1112 static bool
1113 find_induction_variables (struct ivopts_data *data)
1114 {
1115   unsigned i;
1116   bitmap_iterator bi;
1117
1118   if (!find_bivs (data))
1119     return false;
1120
1121   find_givs (data);
1122   mark_bivs (data);
1123
1124   if (dump_file && (dump_flags & TDF_DETAILS))
1125     {
1126       struct tree_niter_desc *niter;
1127
1128       niter = niter_for_single_dom_exit (data);
1129
1130       if (niter)
1131         {
1132           fprintf (dump_file, "  number of iterations ");
1133           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1134           fprintf (dump_file, "\n");
1135
1136           fprintf (dump_file, "  may be zero if ");
1137           print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1138           fprintf (dump_file, "\n");
1139           fprintf (dump_file, "\n");
1140         };
1141
1142       fprintf (dump_file, "Induction variables:\n\n");
1143
1144       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1145         {
1146           if (ver_info (data, i)->iv)
1147             dump_iv (dump_file, ver_info (data, i)->iv);
1148         }
1149     }
1150
1151   return true;
1152 }
1153
1154 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1155
1156 static struct iv_use *
1157 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1158             tree stmt, enum use_type use_type)
1159 {
1160   struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1161
1162   use->id = n_iv_uses (data);
1163   use->type = use_type;
1164   use->iv = iv;
1165   use->stmt = stmt;
1166   use->op_p = use_p;
1167   use->related_cands = BITMAP_ALLOC (NULL);
1168
1169   /* To avoid showing ssa name in the dumps, if it was not reset by the
1170      caller.  */
1171   iv->ssa_name = NULL_TREE;
1172
1173   if (dump_file && (dump_flags & TDF_DETAILS))
1174     dump_use (dump_file, use);
1175
1176   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1177
1178   return use;
1179 }
1180
1181 /* Checks whether OP is a loop-level invariant and if so, records it.
1182    NONLINEAR_USE is true if the invariant is used in a way we do not
1183    handle specially.  */
1184
1185 static void
1186 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1187 {
1188   basic_block bb;
1189   struct version_info *info;
1190
1191   if (TREE_CODE (op) != SSA_NAME
1192       || !is_gimple_reg (op))
1193     return;
1194
1195   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1196   if (bb
1197       && flow_bb_inside_loop_p (data->current_loop, bb))
1198     return;
1199
1200   info = name_info (data, op);
1201   info->name = op;
1202   info->has_nonlin_use |= nonlinear_use;
1203   if (!info->inv_id)
1204     info->inv_id = ++data->max_inv_id;
1205   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1206 }
1207
1208 /* Checks whether the use OP is interesting and if so, records it
1209    as TYPE.  */
1210
1211 static struct iv_use *
1212 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1213                                        enum use_type type)
1214 {
1215   struct iv *iv;
1216   struct iv *civ;
1217   tree stmt;
1218   struct iv_use *use;
1219
1220   if (TREE_CODE (op) != SSA_NAME)
1221     return NULL;
1222
1223   iv = get_iv (data, op);
1224   if (!iv)
1225     return NULL;
1226
1227   if (iv->have_use_for)
1228     {
1229       use = iv_use (data, iv->use_id);
1230
1231       gcc_assert (use->type == USE_NONLINEAR_EXPR
1232                   || use->type == USE_OUTER);
1233
1234       if (type == USE_NONLINEAR_EXPR)
1235         use->type = USE_NONLINEAR_EXPR;
1236       return use;
1237     }
1238
1239   if (zero_p (iv->step))
1240     {
1241       record_invariant (data, op, true);
1242       return NULL;
1243     }
1244   iv->have_use_for = true;
1245
1246   civ = xmalloc (sizeof (struct iv));
1247   *civ = *iv;
1248
1249   stmt = SSA_NAME_DEF_STMT (op);
1250   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1251               || TREE_CODE (stmt) == MODIFY_EXPR);
1252
1253   use = record_use (data, NULL, civ, stmt, type);
1254   iv->use_id = use->id;
1255
1256   return use;
1257 }
1258
1259 /* Checks whether the use OP is interesting and if so, records it.  */
1260
1261 static struct iv_use *
1262 find_interesting_uses_op (struct ivopts_data *data, tree op)
1263 {
1264   return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1265 }
1266
1267 /* Records a definition of induction variable OP that is used outside of the
1268    loop.  */
1269
1270 static struct iv_use *
1271 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1272 {
1273   return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1274 }
1275
1276 /* Checks whether the condition *COND_P in STMT is interesting
1277    and if so, records it.  */
1278
1279 static void
1280 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1281 {
1282   tree *op0_p;
1283   tree *op1_p;
1284   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1285   struct iv const_iv;
1286   tree zero = integer_zero_node;
1287
1288   const_iv.step = NULL_TREE;
1289
1290   if (TREE_CODE (*cond_p) != SSA_NAME
1291       && !COMPARISON_CLASS_P (*cond_p))
1292     return;
1293
1294   if (TREE_CODE (*cond_p) == SSA_NAME)
1295     {
1296       op0_p = cond_p;
1297       op1_p = &zero;
1298     }
1299   else
1300     {
1301       op0_p = &TREE_OPERAND (*cond_p, 0);
1302       op1_p = &TREE_OPERAND (*cond_p, 1);
1303     }
1304
1305   if (TREE_CODE (*op0_p) == SSA_NAME)
1306     iv0 = get_iv (data, *op0_p);
1307   else
1308     iv0 = &const_iv;
1309
1310   if (TREE_CODE (*op1_p) == SSA_NAME)
1311     iv1 = get_iv (data, *op1_p);
1312   else
1313     iv1 = &const_iv;
1314
1315   if (/* When comparing with non-invariant value, we may not do any senseful
1316          induction variable elimination.  */
1317       (!iv0 || !iv1)
1318       /* Eliminating condition based on two ivs would be nontrivial.
1319          ??? TODO -- it is not really important to handle this case.  */
1320       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1321     {
1322       find_interesting_uses_op (data, *op0_p);
1323       find_interesting_uses_op (data, *op1_p);
1324       return;
1325     }
1326
1327   if (zero_p (iv0->step) && zero_p (iv1->step))
1328     {
1329       /* If both are invariants, this is a work for unswitching.  */
1330       return;
1331     }
1332
1333   civ = xmalloc (sizeof (struct iv));
1334   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1335   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1336 }
1337
1338 /* Returns true if expression EXPR is obviously invariant in LOOP,
1339    i.e. if all its operands are defined outside of the LOOP.  */
1340
1341 bool
1342 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1343 {
1344   basic_block def_bb;
1345   unsigned i, len;
1346
1347   if (is_gimple_min_invariant (expr))
1348     return true;
1349
1350   if (TREE_CODE (expr) == SSA_NAME)
1351     {
1352       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1353       if (def_bb
1354           && flow_bb_inside_loop_p (loop, def_bb))
1355         return false;
1356
1357       return true;
1358     }
1359
1360   if (!EXPR_P (expr))
1361     return false;
1362
1363   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1364   for (i = 0; i < len; i++)
1365     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1366       return false;
1367
1368   return true;
1369 }
1370
1371 /* Cumulates the steps of indices into DATA and replaces their values with the
1372    initial ones.  Returns false when the value of the index cannot be determined.
1373    Callback for for_each_index.  */
1374
1375 struct ifs_ivopts_data
1376 {
1377   struct ivopts_data *ivopts_data;
1378   tree stmt;
1379   tree *step_p;
1380 };
1381
1382 static bool
1383 idx_find_step (tree base, tree *idx, void *data)
1384 {
1385   struct ifs_ivopts_data *dta = data;
1386   struct iv *iv;
1387   tree step, type, iv_type, iv_step, lbound, off;
1388   struct loop *loop = dta->ivopts_data->current_loop;
1389
1390   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1391       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1392     return false;
1393
1394   /* If base is a component ref, require that the offset of the reference
1395      be invariant.  */
1396   if (TREE_CODE (base) == COMPONENT_REF)
1397     {
1398       off = component_ref_field_offset (base);
1399       return expr_invariant_in_loop_p (loop, off);
1400     }
1401
1402   /* If base is array, first check whether we will be able to move the
1403      reference out of the loop (in order to take its address in strength
1404      reduction).  In order for this to work we need both lower bound
1405      and step to be loop invariants.  */
1406   if (TREE_CODE (base) == ARRAY_REF)
1407     {
1408       step = array_ref_element_size (base);
1409       lbound = array_ref_low_bound (base);
1410
1411       if (!expr_invariant_in_loop_p (loop, step)
1412           || !expr_invariant_in_loop_p (loop, lbound))
1413         return false;
1414     }
1415
1416   if (TREE_CODE (*idx) != SSA_NAME)
1417     return true;
1418
1419   iv = get_iv (dta->ivopts_data, *idx);
1420   if (!iv)
1421     return false;
1422
1423   *idx = iv->base;
1424
1425   if (!iv->step)
1426     return true;
1427
1428   iv_type = TREE_TYPE (iv->base);
1429   type = build_pointer_type (TREE_TYPE (base));
1430   if (TREE_CODE (base) == ARRAY_REF)
1431     {
1432       step = array_ref_element_size (base);
1433
1434       /* We only handle addresses whose step is an integer constant.  */
1435       if (TREE_CODE (step) != INTEGER_CST)
1436         return false;
1437     }
1438   else
1439     /* The step for pointer arithmetics already is 1 byte.  */
1440     step = build_int_cst (type, 1);
1441
1442   if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1443     iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1444                                           type, iv->base, iv->step, dta->stmt);
1445   else
1446     iv_step = fold_convert (iv_type, iv->step);
1447
1448   if (!iv_step)
1449     {
1450       /* The index might wrap.  */
1451       return false;
1452     }
1453
1454   step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1455
1456   if (!*dta->step_p)
1457     *dta->step_p = step;
1458   else
1459     *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1460                                             *dta->step_p, step);
1461
1462   return true;
1463 }
1464
1465 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1466    object is passed to it in DATA.  */
1467
1468 static bool
1469 idx_record_use (tree base, tree *idx,
1470                 void *data)
1471 {
1472   find_interesting_uses_op (data, *idx);
1473   if (TREE_CODE (base) == ARRAY_REF)
1474     {
1475       find_interesting_uses_op (data, array_ref_element_size (base));
1476       find_interesting_uses_op (data, array_ref_low_bound (base));
1477     }
1478   return true;
1479 }
1480
1481 /* Returns true if memory reference REF may be unaligned.  */
1482
1483 static bool
1484 may_be_unaligned_p (tree ref)
1485 {
1486   tree base;
1487   tree base_type;
1488   HOST_WIDE_INT bitsize;
1489   HOST_WIDE_INT bitpos;
1490   tree toffset;
1491   enum machine_mode mode;
1492   int unsignedp, volatilep;
1493   unsigned base_align;
1494
1495   /* The test below is basically copy of what expr.c:normal_inner_ref
1496      does to check whether the object must be loaded by parts when
1497      STRICT_ALIGNMENT is true.  */
1498   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1499                               &unsignedp, &volatilep, true);
1500   base_type = TREE_TYPE (base);
1501   base_align = TYPE_ALIGN (base_type);
1502
1503   if (mode != BLKmode
1504       && (base_align < GET_MODE_ALIGNMENT (mode)
1505           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1506           || bitpos % BITS_PER_UNIT != 0))
1507     return true;
1508
1509   return false;
1510 }
1511
1512 /* Finds addresses in *OP_P inside STMT.  */
1513
1514 static void
1515 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1516 {
1517   tree base = unshare_expr (*op_p), step = NULL;
1518   struct iv *civ;
1519   struct ifs_ivopts_data ifs_ivopts_data;
1520
1521   /* Ignore bitfields for now.  Not really something terribly complicated
1522      to handle.  TODO.  */
1523   if (TREE_CODE (base) == COMPONENT_REF
1524       && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1525     goto fail;
1526
1527   if (STRICT_ALIGNMENT
1528       && may_be_unaligned_p (base))
1529     goto fail;
1530
1531   ifs_ivopts_data.ivopts_data = data;
1532   ifs_ivopts_data.stmt = stmt;
1533   ifs_ivopts_data.step_p = &step;
1534   if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1535       || zero_p (step))
1536     goto fail;
1537
1538   gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1539   gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1540
1541   if (TREE_CODE (base) == INDIRECT_REF)
1542     base = TREE_OPERAND (base, 0);
1543   else
1544     base = build_addr (base);
1545
1546   civ = alloc_iv (base, step);
1547   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1548   return;
1549
1550 fail:
1551   for_each_index (op_p, idx_record_use, data);
1552 }
1553
1554 /* Finds and records invariants used in STMT.  */
1555
1556 static void
1557 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1558 {
1559   use_optype uses = NULL;
1560   unsigned i, n;
1561   tree op;
1562
1563   if (TREE_CODE (stmt) == PHI_NODE)
1564     n = PHI_NUM_ARGS (stmt);
1565   else
1566     {
1567       uses = STMT_USE_OPS (stmt);
1568       n = NUM_USES (uses);
1569     }
1570
1571   for (i = 0; i < n; i++)
1572     {
1573       if (TREE_CODE (stmt) == PHI_NODE)
1574         op = PHI_ARG_DEF (stmt, i);
1575       else
1576         op = USE_OP (uses, i);
1577
1578       record_invariant (data, op, false);
1579     }
1580 }
1581
1582 /* Finds interesting uses of induction variables in the statement STMT.  */
1583
1584 static void
1585 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1586 {
1587   struct iv *iv;
1588   tree op, lhs, rhs;
1589   use_optype uses = NULL;
1590   unsigned i, n;
1591
1592   find_invariants_stmt (data, stmt);
1593
1594   if (TREE_CODE (stmt) == COND_EXPR)
1595     {
1596       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1597       return;
1598     }
1599
1600   if (TREE_CODE (stmt) == MODIFY_EXPR)
1601     {
1602       lhs = TREE_OPERAND (stmt, 0);
1603       rhs = TREE_OPERAND (stmt, 1);
1604
1605       if (TREE_CODE (lhs) == SSA_NAME)
1606         {
1607           /* If the statement defines an induction variable, the uses are not
1608              interesting by themselves.  */
1609
1610           iv = get_iv (data, lhs);
1611
1612           if (iv && !zero_p (iv->step))
1613             return;
1614         }
1615
1616       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1617         {
1618         case tcc_comparison:
1619           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1620           return;
1621
1622         case tcc_reference:
1623           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1624           if (REFERENCE_CLASS_P (lhs))
1625             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1626           return;
1627
1628         default: ;
1629         }
1630
1631       if (REFERENCE_CLASS_P (lhs)
1632           && is_gimple_val (rhs))
1633         {
1634           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1635           find_interesting_uses_op (data, rhs);
1636           return;
1637         }
1638
1639       /* TODO -- we should also handle address uses of type
1640
1641          memory = call (whatever);
1642
1643          and
1644
1645          call (memory).  */
1646     }
1647
1648   if (TREE_CODE (stmt) == PHI_NODE
1649       && bb_for_stmt (stmt) == data->current_loop->header)
1650     {
1651       lhs = PHI_RESULT (stmt);
1652       iv = get_iv (data, lhs);
1653
1654       if (iv && !zero_p (iv->step))
1655         return;
1656     }
1657
1658   if (TREE_CODE (stmt) == PHI_NODE)
1659     n = PHI_NUM_ARGS (stmt);
1660   else
1661     {
1662       uses = STMT_USE_OPS (stmt);
1663       n = NUM_USES (uses);
1664     }
1665
1666   for (i = 0; i < n; i++)
1667     {
1668       if (TREE_CODE (stmt) == PHI_NODE)
1669         op = PHI_ARG_DEF (stmt, i);
1670       else
1671         op = USE_OP (uses, i);
1672
1673       if (TREE_CODE (op) != SSA_NAME)
1674         continue;
1675
1676       iv = get_iv (data, op);
1677       if (!iv)
1678         continue;
1679
1680       find_interesting_uses_op (data, op);
1681     }
1682 }
1683
1684 /* Finds interesting uses of induction variables outside of loops
1685    on loop exit edge EXIT.  */
1686
1687 static void
1688 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1689 {
1690   tree phi, def;
1691
1692   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1693     {
1694       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1695       find_interesting_uses_outer (data, def);
1696     }
1697 }
1698
1699 /* Finds uses of the induction variables that are interesting.  */
1700
1701 static void
1702 find_interesting_uses (struct ivopts_data *data)
1703 {
1704   basic_block bb;
1705   block_stmt_iterator bsi;
1706   tree phi;
1707   basic_block *body = get_loop_body (data->current_loop);
1708   unsigned i;
1709   struct version_info *info;
1710   edge e;
1711
1712   if (dump_file && (dump_flags & TDF_DETAILS))
1713     fprintf (dump_file, "Uses:\n\n");
1714
1715   for (i = 0; i < data->current_loop->num_nodes; i++)
1716     {
1717       edge_iterator ei;
1718       bb = body[i];
1719
1720       FOR_EACH_EDGE (e, ei, bb->succs)
1721         if (e->dest != EXIT_BLOCK_PTR
1722             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1723           find_interesting_uses_outside (data, e);
1724
1725       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1726         find_interesting_uses_stmt (data, phi);
1727       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1728         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1729     }
1730
1731   if (dump_file && (dump_flags & TDF_DETAILS))
1732     {
1733       bitmap_iterator bi;
1734
1735       fprintf (dump_file, "\n");
1736
1737       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1738         {
1739           info = ver_info (data, i);
1740           if (info->inv_id)
1741             {
1742               fprintf (dump_file, "  ");
1743               print_generic_expr (dump_file, info->name, TDF_SLIM);
1744               fprintf (dump_file, " is invariant (%d)%s\n",
1745                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1746             }
1747         }
1748
1749       fprintf (dump_file, "\n");
1750     }
1751
1752   free (body);
1753 }
1754
1755 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1756    is true, assume we are inside an address.  */
1757
1758 static tree
1759 strip_offset (tree expr, bool inside_addr, unsigned HOST_WIDE_INT *offset)
1760 {
1761   tree op0 = NULL_TREE, op1 = NULL_TREE, step;
1762   enum tree_code code;
1763   tree type, orig_type = TREE_TYPE (expr);
1764   unsigned HOST_WIDE_INT off0, off1, st;
1765   tree orig_expr = expr;
1766
1767   STRIP_NOPS (expr);
1768   type = TREE_TYPE (expr);
1769   code = TREE_CODE (expr);
1770   *offset = 0;
1771
1772   switch (code)
1773     {
1774     case INTEGER_CST:
1775       if (!cst_and_fits_in_hwi (expr)
1776           || zero_p (expr))
1777         return orig_expr;
1778
1779       *offset = int_cst_value (expr);
1780       return build_int_cst_type (orig_type, 0);
1781
1782     case PLUS_EXPR:
1783     case MINUS_EXPR:
1784       op0 = TREE_OPERAND (expr, 0);
1785       op1 = TREE_OPERAND (expr, 1);
1786
1787       op0 = strip_offset (op0, false, &off0);
1788       op1 = strip_offset (op1, false, &off1);
1789
1790       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1791       if (op0 == TREE_OPERAND (expr, 0)
1792           && op1 == TREE_OPERAND (expr, 1))
1793         return orig_expr;
1794
1795       if (zero_p (op1))
1796         expr = op0;
1797       else if (zero_p (op0))
1798         {
1799           if (code == PLUS_EXPR)
1800             expr = op1;
1801           else
1802             expr = build1 (NEGATE_EXPR, type, op1);
1803         }
1804       else
1805         expr = build2 (code, type, op0, op1);
1806
1807       return fold_convert (orig_type, expr);
1808
1809     case ARRAY_REF:
1810       if (!inside_addr)
1811         return orig_expr;
1812
1813       step = array_ref_element_size (expr);
1814       if (!cst_and_fits_in_hwi (step))
1815         break;
1816
1817       st = int_cst_value (step);
1818       op1 = TREE_OPERAND (expr, 1);
1819       op1 = strip_offset (op1, false, &off1);
1820       *offset = off1 * st;
1821       break;
1822
1823     case COMPONENT_REF:
1824       if (!inside_addr)
1825         return orig_expr;
1826       break;
1827
1828     case ADDR_EXPR:
1829       inside_addr = true;
1830       break;
1831
1832     default:
1833       return orig_expr;
1834     }
1835
1836   /* Default handling of expressions for that we want to recurse into
1837      the first operand.  */
1838   op0 = TREE_OPERAND (expr, 0);
1839   op0 = strip_offset (op0, inside_addr, &off0);
1840   *offset += off0;
1841
1842   if (op0 == TREE_OPERAND (expr, 0)
1843       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1844     return orig_expr;
1845
1846   expr = copy_node (expr);
1847   TREE_OPERAND (expr, 0) = op0;
1848   if (op1)
1849     TREE_OPERAND (expr, 1) = op1;
1850
1851   return fold_convert (orig_type, expr);
1852 }
1853
1854 /* Returns variant of TYPE that can be used as base for different uses.
1855    For integer types, we return unsigned variant of the type, which
1856    avoids problems with overflows.  For pointer types, we return void *.  */
1857
1858 static tree
1859 generic_type_for (tree type)
1860 {
1861   if (POINTER_TYPE_P (type))
1862     return ptr_type_node;
1863
1864   if (TYPE_UNSIGNED (type))
1865     return type;
1866
1867   return unsigned_type_for (type);
1868 }
1869
1870 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1871    position to POS.  If USE is not NULL, the candidate is set as related to
1872    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1873    replacement of the final value of the iv by a direct computation.  */
1874
1875 static struct iv_cand *
1876 add_candidate_1 (struct ivopts_data *data,
1877                  tree base, tree step, bool important, enum iv_position pos,
1878                  struct iv_use *use, tree incremented_at)
1879 {
1880   unsigned i;
1881   struct iv_cand *cand = NULL;
1882   tree type, orig_type;
1883
1884   if (base)
1885     {
1886       orig_type = TREE_TYPE (base);
1887       type = generic_type_for (orig_type);
1888       if (type != orig_type)
1889         {
1890           base = fold_convert (type, base);
1891           if (step)
1892             step = fold_convert (type, step);
1893         }
1894     }
1895
1896   for (i = 0; i < n_iv_cands (data); i++)
1897     {
1898       cand = iv_cand (data, i);
1899
1900       if (cand->pos != pos)
1901         continue;
1902
1903       if (cand->incremented_at != incremented_at)
1904         continue;
1905
1906       if (!cand->iv)
1907         {
1908           if (!base && !step)
1909             break;
1910
1911           continue;
1912         }
1913
1914       if (!base && !step)
1915         continue;
1916
1917       if (!operand_equal_p (base, cand->iv->base, 0))
1918         continue;
1919
1920       if (zero_p (cand->iv->step))
1921         {
1922           if (zero_p (step))
1923             break;
1924         }
1925       else
1926         {
1927           if (step && operand_equal_p (step, cand->iv->step, 0))
1928             break;
1929         }
1930     }
1931
1932   if (i == n_iv_cands (data))
1933     {
1934       cand = xcalloc (1, sizeof (struct iv_cand));
1935       cand->id = i;
1936
1937       if (!base && !step)
1938         cand->iv = NULL;
1939       else
1940         cand->iv = alloc_iv (base, step);
1941
1942       cand->pos = pos;
1943       if (pos != IP_ORIGINAL && cand->iv)
1944         {
1945           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1946           cand->var_after = cand->var_before;
1947         }
1948       cand->important = important;
1949       cand->incremented_at = incremented_at;
1950       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1951
1952       if (dump_file && (dump_flags & TDF_DETAILS))
1953         dump_cand (dump_file, cand);
1954     }
1955
1956   if (important && !cand->important)
1957     {
1958       cand->important = true;
1959       if (dump_file && (dump_flags & TDF_DETAILS))
1960         fprintf (dump_file, "Candidate %d is important\n", cand->id);
1961     }
1962
1963   if (use)
1964     {
1965       bitmap_set_bit (use->related_cands, i);
1966       if (dump_file && (dump_flags & TDF_DETAILS))
1967         fprintf (dump_file, "Candidate %d is related to use %d\n",
1968                  cand->id, use->id);
1969     }
1970
1971   return cand;
1972 }
1973
1974 /* Returns true if incrementing the induction variable at the end of the LOOP
1975    is allowed.
1976
1977    The purpose is to avoid splitting latch edge with a biv increment, thus
1978    creating a jump, possibly confusing other optimization passes and leaving
1979    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
1980    is not available (so we do not have a better alternative), or if the latch
1981    edge is already nonempty.  */
1982
1983 static bool
1984 allow_ip_end_pos_p (struct loop *loop)
1985 {
1986   if (!ip_normal_pos (loop))
1987     return true;
1988
1989   if (!empty_block_p (ip_end_pos (loop)))
1990     return true;
1991
1992   return false;
1993 }
1994
1995 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1996    position to POS.  If USE is not NULL, the candidate is set as related to
1997    it.  The candidate computation is scheduled on all available positions.  */
1998
1999 static void
2000 add_candidate (struct ivopts_data *data,
2001                tree base, tree step, bool important, struct iv_use *use)
2002 {
2003   if (ip_normal_pos (data->current_loop))
2004     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2005   if (ip_end_pos (data->current_loop)
2006       && allow_ip_end_pos_p (data->current_loop))
2007     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2008 }
2009
2010 /* Add a standard "0 + 1 * iteration" iv candidate for a
2011    type with SIZE bits.  */
2012
2013 static void
2014 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2015                                      unsigned int size)
2016 {
2017   tree type = lang_hooks.types.type_for_size (size, true);
2018   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2019                  true, NULL);
2020 }
2021
2022 /* Adds standard iv candidates.  */
2023
2024 static void
2025 add_standard_iv_candidates (struct ivopts_data *data)
2026 {
2027   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2028
2029   /* The same for a double-integer type if it is still fast enough.  */
2030   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2031     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2032 }
2033
2034
2035 /* Adds candidates bases on the old induction variable IV.  */
2036
2037 static void
2038 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2039 {
2040   tree phi, def;
2041   struct iv_cand *cand;
2042
2043   add_candidate (data, iv->base, iv->step, true, NULL);
2044
2045   /* The same, but with initial value zero.  */
2046   add_candidate (data,
2047                  build_int_cst (TREE_TYPE (iv->base), 0),
2048                  iv->step, true, NULL);
2049
2050   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2051   if (TREE_CODE (phi) == PHI_NODE)
2052     {
2053       /* Additionally record the possibility of leaving the original iv
2054          untouched.  */
2055       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2056       cand = add_candidate_1 (data,
2057                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2058                               SSA_NAME_DEF_STMT (def));
2059       cand->var_before = iv->ssa_name;
2060       cand->var_after = def;
2061     }
2062 }
2063
2064 /* Adds candidates based on the old induction variables.  */
2065
2066 static void
2067 add_old_ivs_candidates (struct ivopts_data *data)
2068 {
2069   unsigned i;
2070   struct iv *iv;
2071   bitmap_iterator bi;
2072
2073   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2074     {
2075       iv = ver_info (data, i)->iv;
2076       if (iv && iv->biv_p && !zero_p (iv->step))
2077         add_old_iv_candidates (data, iv);
2078     }
2079 }
2080
2081 /* Adds candidates based on the value of the induction variable IV and USE.  */
2082
2083 static void
2084 add_iv_value_candidates (struct ivopts_data *data,
2085                          struct iv *iv, struct iv_use *use)
2086 {
2087   add_candidate (data, iv->base, iv->step, false, use);
2088
2089   /* The same, but with initial value zero.  */
2090   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2091                  iv->step, false, use);
2092 }
2093
2094 /* Adds candidates based on the address IV and USE.  */
2095
2096 static void
2097 add_address_candidates (struct ivopts_data *data,
2098                         struct iv *iv, struct iv_use *use)
2099 {
2100   tree base, abase;
2101   unsigned HOST_WIDE_INT offset;
2102
2103   /* First, the trivial choices.  */
2104   add_iv_value_candidates (data, iv, use);
2105
2106   /* Second, try removing the COMPONENT_REFs.  */
2107   if (TREE_CODE (iv->base) == ADDR_EXPR)
2108     {
2109       base = TREE_OPERAND (iv->base, 0);
2110       while (TREE_CODE (base) == COMPONENT_REF
2111              || (TREE_CODE (base) == ARRAY_REF
2112                  && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
2113         base = TREE_OPERAND (base, 0);
2114
2115       if (base != TREE_OPERAND (iv->base, 0))
2116         {
2117           gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
2118           gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
2119
2120           if (TREE_CODE (base) == INDIRECT_REF)
2121             base = TREE_OPERAND (base, 0);
2122           else
2123             base = build_addr (base);
2124           add_candidate (data, base, iv->step, false, use);
2125         }
2126     }
2127
2128   /* Third, try removing the constant offset.  */
2129   abase = iv->base;
2130   base = strip_offset (abase, false, &offset);
2131   if (offset)
2132     add_candidate (data, base, iv->step, false, use);
2133 }
2134
2135 /* Possibly adds pseudocandidate for replacing the final value of USE by
2136    a direct computation.  */
2137
2138 static void
2139 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2140 {
2141   struct tree_niter_desc *niter;
2142
2143   /* We must know where we exit the loop and how many times does it roll.  */
2144   niter = niter_for_single_dom_exit (data);
2145   if (!niter
2146       || !zero_p (niter->may_be_zero))
2147     return;
2148
2149   add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2150 }
2151
2152 /* Adds candidates based on the uses.  */
2153
2154 static void
2155 add_derived_ivs_candidates (struct ivopts_data *data)
2156 {
2157   unsigned i;
2158
2159   for (i = 0; i < n_iv_uses (data); i++)
2160     {
2161       struct iv_use *use = iv_use (data, i);
2162
2163       if (!use)
2164         continue;
2165
2166       switch (use->type)
2167         {
2168         case USE_NONLINEAR_EXPR:
2169         case USE_COMPARE:
2170           /* Just add the ivs based on the value of the iv used here.  */
2171           add_iv_value_candidates (data, use->iv, use);
2172           break;
2173
2174         case USE_OUTER:
2175           add_iv_value_candidates (data, use->iv, use);
2176
2177           /* Additionally, add the pseudocandidate for the possibility to
2178              replace the final value by a direct computation.  */
2179           add_iv_outer_candidates (data, use);
2180           break;
2181
2182         case USE_ADDRESS:
2183           add_address_candidates (data, use->iv, use);
2184           break;
2185
2186         default:
2187           gcc_unreachable ();
2188         }
2189     }
2190 }
2191
2192 /* Record important candidates and add them to related_cands bitmaps
2193    if needed.  */
2194
2195 static void
2196 record_important_candidates (struct ivopts_data *data)
2197 {
2198   unsigned i;
2199   struct iv_use *use;
2200
2201   for (i = 0; i < n_iv_cands (data); i++)
2202     {
2203       struct iv_cand *cand = iv_cand (data, i);
2204
2205       if (cand->important)
2206         bitmap_set_bit (data->important_candidates, i);
2207     }
2208
2209   data->consider_all_candidates = (n_iv_cands (data)
2210                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2211
2212   if (data->consider_all_candidates)
2213     {
2214       /* We will not need "related_cands" bitmaps in this case,
2215          so release them to decrease peak memory consumption.  */
2216       for (i = 0; i < n_iv_uses (data); i++)
2217         {
2218           use = iv_use (data, i);
2219           BITMAP_FREE (use->related_cands);
2220         }
2221     }
2222   else
2223     {
2224       /* Add important candidates to the related_cands bitmaps.  */
2225       for (i = 0; i < n_iv_uses (data); i++)
2226         bitmap_ior_into (iv_use (data, i)->related_cands,
2227                          data->important_candidates);
2228     }
2229 }
2230
2231 /* Finds the candidates for the induction variables.  */
2232
2233 static void
2234 find_iv_candidates (struct ivopts_data *data)
2235 {
2236   /* Add commonly used ivs.  */
2237   add_standard_iv_candidates (data);
2238
2239   /* Add old induction variables.  */
2240   add_old_ivs_candidates (data);
2241
2242   /* Add induction variables derived from uses.  */
2243   add_derived_ivs_candidates (data);
2244
2245   /* Record the important candidates.  */
2246   record_important_candidates (data);
2247 }
2248
2249 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2250    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2251    we allocate a simple list to every use.  */
2252
2253 static void
2254 alloc_use_cost_map (struct ivopts_data *data)
2255 {
2256   unsigned i, size, s, j;
2257
2258   for (i = 0; i < n_iv_uses (data); i++)
2259     {
2260       struct iv_use *use = iv_use (data, i);
2261       bitmap_iterator bi;
2262
2263       if (data->consider_all_candidates)
2264         size = n_iv_cands (data);
2265       else
2266         {
2267           s = 0;
2268           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2269             {
2270               s++;
2271             }
2272
2273           /* Round up to the power of two, so that moduling by it is fast.  */
2274           for (size = 1; size < s; size <<= 1)
2275             continue;
2276         }
2277
2278       use->n_map_members = size;
2279       use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2280     }
2281 }
2282
2283 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2284    on invariants DEPENDS_ON and that the value used in expressing it
2285    is VALUE.*/
2286
2287 static void
2288 set_use_iv_cost (struct ivopts_data *data,
2289                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2290                  bitmap depends_on, tree value)
2291 {
2292   unsigned i, s;
2293
2294   if (cost == INFTY)
2295     {
2296       BITMAP_FREE (depends_on);
2297       return;
2298     }
2299
2300   if (data->consider_all_candidates)
2301     {
2302       use->cost_map[cand->id].cand = cand;
2303       use->cost_map[cand->id].cost = cost;
2304       use->cost_map[cand->id].depends_on = depends_on;
2305       use->cost_map[cand->id].value = value;
2306       return;
2307     }
2308
2309   /* n_map_members is a power of two, so this computes modulo.  */
2310   s = cand->id & (use->n_map_members - 1);
2311   for (i = s; i < use->n_map_members; i++)
2312     if (!use->cost_map[i].cand)
2313       goto found;
2314   for (i = 0; i < s; i++)
2315     if (!use->cost_map[i].cand)
2316       goto found;
2317
2318   gcc_unreachable ();
2319
2320 found:
2321   use->cost_map[i].cand = cand;
2322   use->cost_map[i].cost = cost;
2323   use->cost_map[i].depends_on = depends_on;
2324   use->cost_map[i].value = value;
2325 }
2326
2327 /* Gets cost of (USE, CANDIDATE) pair.  */
2328
2329 static struct cost_pair *
2330 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2331                  struct iv_cand *cand)
2332 {
2333   unsigned i, s;
2334   struct cost_pair *ret;
2335
2336   if (!cand)
2337     return NULL;
2338
2339   if (data->consider_all_candidates)
2340     {
2341       ret = use->cost_map + cand->id;
2342       if (!ret->cand)
2343         return NULL;
2344
2345       return ret;
2346     }
2347
2348   /* n_map_members is a power of two, so this computes modulo.  */
2349   s = cand->id & (use->n_map_members - 1);
2350   for (i = s; i < use->n_map_members; i++)
2351     if (use->cost_map[i].cand == cand)
2352       return use->cost_map + i;
2353
2354   for (i = 0; i < s; i++)
2355     if (use->cost_map[i].cand == cand)
2356       return use->cost_map + i;
2357
2358   return NULL;
2359 }
2360
2361 /* Returns estimate on cost of computing SEQ.  */
2362
2363 static unsigned
2364 seq_cost (rtx seq)
2365 {
2366   unsigned cost = 0;
2367   rtx set;
2368
2369   for (; seq; seq = NEXT_INSN (seq))
2370     {
2371       set = single_set (seq);
2372       if (set)
2373         cost += rtx_cost (set, SET);
2374       else
2375         cost++;
2376     }
2377
2378   return cost;
2379 }
2380
2381 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2382 static rtx
2383 produce_memory_decl_rtl (tree obj, int *regno)
2384 {
2385   rtx x;
2386
2387   gcc_assert (obj);
2388   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2389     {
2390       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2391       x = gen_rtx_SYMBOL_REF (Pmode, name);
2392     }
2393   else
2394     x = gen_raw_REG (Pmode, (*regno)++);
2395
2396   return gen_rtx_MEM (DECL_MODE (obj), x);
2397 }
2398
2399 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2400    walk_tree.  DATA contains the actual fake register number.  */
2401
2402 static tree
2403 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2404 {
2405   tree obj = NULL_TREE;
2406   rtx x = NULL_RTX;
2407   int *regno = data;
2408
2409   switch (TREE_CODE (*expr_p))
2410     {
2411     case ADDR_EXPR:
2412       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2413            handled_component_p (*expr_p);
2414            expr_p = &TREE_OPERAND (*expr_p, 0))
2415         continue;
2416       obj = *expr_p;
2417       if (DECL_P (obj))
2418         x = produce_memory_decl_rtl (obj, regno);
2419       break;
2420
2421     case SSA_NAME:
2422       *ws = 0;
2423       obj = SSA_NAME_VAR (*expr_p);
2424       if (!DECL_RTL_SET_P (obj))
2425         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2426       break;
2427
2428     case VAR_DECL:
2429     case PARM_DECL:
2430     case RESULT_DECL:
2431       *ws = 0;
2432       obj = *expr_p;
2433
2434       if (DECL_RTL_SET_P (obj))
2435         break;
2436
2437       if (DECL_MODE (obj) == BLKmode)
2438         x = produce_memory_decl_rtl (obj, regno);
2439       else
2440         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2441
2442       break;
2443
2444     default:
2445       break;
2446     }
2447
2448   if (x)
2449     {
2450       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2451       SET_DECL_RTL (obj, x);
2452     }
2453
2454   return NULL_TREE;
2455 }
2456
2457 /* Determines cost of the computation of EXPR.  */
2458
2459 static unsigned
2460 computation_cost (tree expr)
2461 {
2462   rtx seq, rslt;
2463   tree type = TREE_TYPE (expr);
2464   unsigned cost;
2465   /* Avoid using hard regs in ways which may be unsupported.  */
2466   int regno = LAST_VIRTUAL_REGISTER + 1;
2467
2468   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2469   start_sequence ();
2470   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2471   seq = get_insns ();
2472   end_sequence ();
2473
2474   cost = seq_cost (seq);
2475   if (GET_CODE (rslt) == MEM)
2476     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2477
2478   return cost;
2479 }
2480
2481 /* Returns variable containing the value of candidate CAND at statement AT.  */
2482
2483 static tree
2484 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2485 {
2486   if (stmt_after_increment (loop, cand, stmt))
2487     return cand->var_after;
2488   else
2489     return cand->var_before;
2490 }
2491
2492 /* Determines the expression by that USE is expressed from induction variable
2493    CAND at statement AT in LOOP.  */
2494
2495 static tree
2496 get_computation_at (struct loop *loop,
2497                     struct iv_use *use, struct iv_cand *cand, tree at)
2498 {
2499   tree ubase = use->iv->base;
2500   tree ustep = use->iv->step;
2501   tree cbase = cand->iv->base;
2502   tree cstep = cand->iv->step;
2503   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2504   tree uutype;
2505   tree expr, delta;
2506   tree ratio;
2507   unsigned HOST_WIDE_INT ustepi, cstepi;
2508   HOST_WIDE_INT ratioi;
2509
2510   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2511     {
2512       /* We do not have a precision to express the values of use.  */
2513       return NULL_TREE;
2514     }
2515
2516   expr = var_at_stmt (loop, cand, at);
2517
2518   if (TREE_TYPE (expr) != ctype)
2519     {
2520       /* This may happen with the original ivs.  */
2521       expr = fold_convert (ctype, expr);
2522     }
2523
2524   if (TYPE_UNSIGNED (utype))
2525     uutype = utype;
2526   else
2527     {
2528       uutype = unsigned_type_for (utype);
2529       ubase = fold_convert (uutype, ubase);
2530       ustep = fold_convert (uutype, ustep);
2531     }
2532
2533   if (uutype != ctype)
2534     {
2535       expr = fold_convert (uutype, expr);
2536       cbase = fold_convert (uutype, cbase);
2537       cstep = fold_convert (uutype, cstep);
2538     }
2539
2540   if (!cst_and_fits_in_hwi (cstep)
2541       || !cst_and_fits_in_hwi (ustep))
2542     return NULL_TREE;
2543
2544   ustepi = int_cst_value (ustep);
2545   cstepi = int_cst_value (cstep);
2546
2547   if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2548     {
2549       /* TODO maybe consider case when ustep divides cstep and the ratio is
2550          a power of 2 (so that the division is fast to execute)?  We would
2551          need to be much more careful with overflows etc. then.  */
2552       return NULL_TREE;
2553     }
2554
2555   /* We may need to shift the value if we are after the increment.  */
2556   if (stmt_after_increment (loop, cand, at))
2557     cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2558
2559   /* use = ubase - ratio * cbase + ratio * var.
2560
2561      In general case ubase + ratio * (var - cbase) could be better (one less
2562      multiplication), but often it is possible to eliminate redundant parts
2563      of computations from (ubase - ratio * cbase) term, and if it does not
2564      happen, fold is able to apply the distributive law to obtain this form
2565      anyway.  */
2566
2567   if (ratioi == 1)
2568     {
2569       delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2570       expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2571     }
2572   else if (ratioi == -1)
2573     {
2574       delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2575       expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2576     }
2577   else
2578     {
2579       ratio = build_int_cst_type (uutype, ratioi);
2580       delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2581       delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2582       expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2583       expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2584     }
2585
2586   return fold_convert (utype, expr);
2587 }
2588
2589 /* Determines the expression by that USE is expressed from induction variable
2590    CAND in LOOP.  */
2591
2592 static tree
2593 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2594 {
2595   return get_computation_at (loop, use, cand, use->stmt);
2596 }
2597
2598 /* Returns cost of addition in MODE.  */
2599
2600 static unsigned
2601 add_cost (enum machine_mode mode)
2602 {
2603   static unsigned costs[NUM_MACHINE_MODES];
2604   rtx seq;
2605   unsigned cost;
2606
2607   if (costs[mode])
2608     return costs[mode];
2609
2610   start_sequence ();
2611   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2612                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2613                                  gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2614                  NULL_RTX);
2615   seq = get_insns ();
2616   end_sequence ();
2617
2618   cost = seq_cost (seq);
2619   if (!cost)
2620     cost = 1;
2621
2622   costs[mode] = cost;
2623
2624   if (dump_file && (dump_flags & TDF_DETAILS))
2625     fprintf (dump_file, "Addition in %s costs %d\n",
2626              GET_MODE_NAME (mode), cost);
2627   return cost;
2628 }
2629
2630 /* Entry in a hashtable of already known costs for multiplication.  */
2631 struct mbc_entry
2632 {
2633   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2634   enum machine_mode mode;       /* In mode.  */
2635   unsigned cost;                /* The cost.  */
2636 };
2637
2638 /* Counts hash value for the ENTRY.  */
2639
2640 static hashval_t
2641 mbc_entry_hash (const void *entry)
2642 {
2643   const struct mbc_entry *e = entry;
2644
2645   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2646 }
2647
2648 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2649
2650 static int
2651 mbc_entry_eq (const void *entry1, const void *entry2)
2652 {
2653   const struct mbc_entry *e1 = entry1;
2654   const struct mbc_entry *e2 = entry2;
2655
2656   return (e1->mode == e2->mode
2657           && e1->cst == e2->cst);
2658 }
2659
2660 /* Returns cost of multiplication by constant CST in MODE.  */
2661
2662 static unsigned
2663 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2664 {
2665   static htab_t costs;
2666   struct mbc_entry **cached, act;
2667   rtx seq;
2668   unsigned cost;
2669
2670   if (!costs)
2671     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2672
2673   act.mode = mode;
2674   act.cst = cst;
2675   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2676   if (*cached)
2677     return (*cached)->cost;
2678
2679   *cached = xmalloc (sizeof (struct mbc_entry));
2680   (*cached)->mode = mode;
2681   (*cached)->cst = cst;
2682
2683   start_sequence ();
2684   expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2685                NULL_RTX, 0);
2686   seq = get_insns ();
2687   end_sequence ();
2688
2689   cost = seq_cost (seq);
2690
2691   if (dump_file && (dump_flags & TDF_DETAILS))
2692     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2693              (int) cst, GET_MODE_NAME (mode), cost);
2694
2695   (*cached)->cost = cost;
2696
2697   return cost;
2698 }
2699
2700 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2701    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2702    variable is omitted.  The created memory accesses MODE.
2703
2704    TODO -- there must be some better way.  This all is quite crude.  */
2705
2706 static unsigned
2707 get_address_cost (bool symbol_present, bool var_present,
2708                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2709 {
2710 #define MAX_RATIO 128
2711   static sbitmap valid_mult;
2712   static HOST_WIDE_INT rat, off;
2713   static HOST_WIDE_INT min_offset, max_offset;
2714   static unsigned costs[2][2][2][2];
2715   unsigned cost, acost;
2716   rtx seq, addr, base;
2717   bool offset_p, ratio_p;
2718   rtx reg1;
2719   HOST_WIDE_INT s_offset;
2720   unsigned HOST_WIDE_INT mask;
2721   unsigned bits;
2722
2723   if (!valid_mult)
2724     {
2725       HOST_WIDE_INT i;
2726
2727       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2728
2729       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2730       for (i = 1; i <= 1 << 20; i <<= 1)
2731         {
2732           XEXP (addr, 1) = GEN_INT (i);
2733           if (!memory_address_p (Pmode, addr))
2734             break;
2735         }
2736       max_offset = i >> 1;
2737       off = max_offset;
2738
2739       for (i = 1; i <= 1 << 20; i <<= 1)
2740         {
2741           XEXP (addr, 1) = GEN_INT (-i);
2742           if (!memory_address_p (Pmode, addr))
2743             break;
2744         }
2745       min_offset = -(i >> 1);
2746
2747       if (dump_file && (dump_flags & TDF_DETAILS))
2748         {
2749           fprintf (dump_file, "get_address_cost:\n");
2750           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
2751           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
2752         }
2753
2754       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2755       sbitmap_zero (valid_mult);
2756       rat = 1;
2757       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2758       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2759         {
2760           XEXP (addr, 1) = GEN_INT (i);
2761           if (memory_address_p (Pmode, addr))
2762             {
2763               SET_BIT (valid_mult, i + MAX_RATIO);
2764               rat = i;
2765             }
2766         }
2767
2768       if (dump_file && (dump_flags & TDF_DETAILS))
2769         {
2770           fprintf (dump_file, "  allowed multipliers:");
2771           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2772             if (TEST_BIT (valid_mult, i + MAX_RATIO))
2773               fprintf (dump_file, " %d", (int) i);
2774           fprintf (dump_file, "\n");
2775           fprintf (dump_file, "\n");
2776         }
2777     }
2778
2779   bits = GET_MODE_BITSIZE (Pmode);
2780   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2781   offset &= mask;
2782   if ((offset >> (bits - 1) & 1))
2783     offset |= ~mask;
2784   s_offset = offset;
2785
2786   cost = 0;
2787   offset_p = (s_offset != 0
2788               && min_offset <= s_offset && s_offset <= max_offset);
2789   ratio_p = (ratio != 1
2790              && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2791              && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2792
2793   if (ratio != 1 && !ratio_p)
2794     cost += multiply_by_cost (ratio, Pmode);
2795
2796   if (s_offset && !offset_p && !symbol_present)
2797     {
2798       cost += add_cost (Pmode);
2799       var_present = true;
2800     }
2801
2802   acost = costs[symbol_present][var_present][offset_p][ratio_p];
2803   if (!acost)
2804     {
2805       acost = 0;
2806
2807       addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2808       reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2809       if (ratio_p)
2810         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2811
2812       if (var_present)
2813         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2814
2815       if (symbol_present)
2816         {
2817           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2818           if (offset_p)
2819             base = gen_rtx_fmt_e (CONST, Pmode,
2820                                   gen_rtx_fmt_ee (PLUS, Pmode,
2821                                                   base,
2822                                                   GEN_INT (off)));
2823         }
2824       else if (offset_p)
2825         base = GEN_INT (off);
2826       else
2827         base = NULL_RTX;
2828
2829       if (base)
2830         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2831
2832       start_sequence ();
2833       addr = memory_address (Pmode, addr);
2834       seq = get_insns ();
2835       end_sequence ();
2836
2837       acost = seq_cost (seq);
2838       acost += address_cost (addr, Pmode);
2839
2840       if (!acost)
2841         acost = 1;
2842       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2843     }
2844
2845   return cost + acost;
2846 }
2847
2848 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2849    the bitmap to that we should store it.  */
2850
2851 static struct ivopts_data *fd_ivopts_data;
2852 static tree
2853 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2854 {
2855   bitmap *depends_on = data;
2856   struct version_info *info;
2857
2858   if (TREE_CODE (*expr_p) != SSA_NAME)
2859     return NULL_TREE;
2860   info = name_info (fd_ivopts_data, *expr_p);
2861
2862   if (!info->inv_id || info->has_nonlin_use)
2863     return NULL_TREE;
2864
2865   if (!*depends_on)
2866     *depends_on = BITMAP_ALLOC (NULL);
2867   bitmap_set_bit (*depends_on, info->inv_id);
2868
2869   return NULL_TREE;
2870 }
2871
2872 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
2873    invariants the computation depends on.  */
2874
2875 static unsigned
2876 force_var_cost (struct ivopts_data *data,
2877                 tree expr, bitmap *depends_on)
2878 {
2879   static bool costs_initialized = false;
2880   static unsigned integer_cost;
2881   static unsigned symbol_cost;
2882   static unsigned address_cost;
2883   tree op0, op1;
2884   unsigned cost0, cost1, cost;
2885   enum machine_mode mode;
2886
2887   if (!costs_initialized)
2888     {
2889       tree var = create_tmp_var_raw (integer_type_node, "test_var");
2890       rtx x = gen_rtx_MEM (DECL_MODE (var),
2891                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2892       tree addr;
2893       tree type = build_pointer_type (integer_type_node);
2894
2895       integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2896                                                            2000));
2897
2898       SET_DECL_RTL (var, x);
2899       TREE_STATIC (var) = 1;
2900       addr = build1 (ADDR_EXPR, type, var);
2901       symbol_cost = computation_cost (addr) + 1;
2902
2903       address_cost
2904         = computation_cost (build2 (PLUS_EXPR, type,
2905                                     addr,
2906                                     build_int_cst_type (type, 2000))) + 1;
2907       if (dump_file && (dump_flags & TDF_DETAILS))
2908         {
2909           fprintf (dump_file, "force_var_cost:\n");
2910           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
2911           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
2912           fprintf (dump_file, "  address %d\n", (int) address_cost);
2913           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
2914           fprintf (dump_file, "\n");
2915         }
2916
2917       costs_initialized = true;
2918     }
2919
2920   STRIP_NOPS (expr);
2921
2922   if (depends_on)
2923     {
2924       fd_ivopts_data = data;
2925       walk_tree (&expr, find_depends, depends_on, NULL);
2926     }
2927
2928   if (SSA_VAR_P (expr))
2929     return 0;
2930
2931   if (TREE_INVARIANT (expr))
2932     {
2933       if (TREE_CODE (expr) == INTEGER_CST)
2934         return integer_cost;
2935
2936       if (TREE_CODE (expr) == ADDR_EXPR)
2937         {
2938           tree obj = TREE_OPERAND (expr, 0);
2939
2940           if (TREE_CODE (obj) == VAR_DECL
2941               || TREE_CODE (obj) == PARM_DECL
2942               || TREE_CODE (obj) == RESULT_DECL)
2943             return symbol_cost;
2944         }
2945
2946       return address_cost;
2947     }
2948
2949   switch (TREE_CODE (expr))
2950     {
2951     case PLUS_EXPR:
2952     case MINUS_EXPR:
2953     case MULT_EXPR:
2954       op0 = TREE_OPERAND (expr, 0);
2955       op1 = TREE_OPERAND (expr, 1);
2956       STRIP_NOPS (op0);
2957       STRIP_NOPS (op1);
2958
2959       if (is_gimple_val (op0))
2960         cost0 = 0;
2961       else
2962         cost0 = force_var_cost (data, op0, NULL);
2963
2964       if (is_gimple_val (op1))
2965         cost1 = 0;
2966       else
2967         cost1 = force_var_cost (data, op1, NULL);
2968
2969       break;
2970
2971     default:
2972       /* Just an arbitrary value, FIXME.  */
2973       return target_spill_cost;
2974     }
2975
2976   mode = TYPE_MODE (TREE_TYPE (expr));
2977   switch (TREE_CODE (expr))
2978     {
2979     case PLUS_EXPR:
2980     case MINUS_EXPR:
2981       cost = add_cost (mode);
2982       break;
2983
2984     case MULT_EXPR:
2985       if (cst_and_fits_in_hwi (op0))
2986         cost = multiply_by_cost (int_cst_value (op0), mode);
2987       else if (cst_and_fits_in_hwi (op1))
2988         cost = multiply_by_cost (int_cst_value (op1), mode);
2989       else
2990         return target_spill_cost;
2991       break;
2992
2993     default:
2994       gcc_unreachable ();
2995     }
2996
2997   cost += cost0;
2998   cost += cost1;
2999
3000   /* Bound the cost by target_spill_cost.  The parts of complicated
3001      computations often are either loop invariant or at least can
3002      be shared between several iv uses, so letting this grow without
3003      limits would not give reasonable results.  */
3004   return cost < target_spill_cost ? cost : target_spill_cost;
3005 }
3006
3007 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3008    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3009    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3010    invariants the computation depends on.  */
3011
3012 static unsigned
3013 split_address_cost (struct ivopts_data *data,
3014                     tree addr, bool *symbol_present, bool *var_present,
3015                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3016 {
3017   tree core;
3018   HOST_WIDE_INT bitsize;
3019   HOST_WIDE_INT bitpos;
3020   tree toffset;
3021   enum machine_mode mode;
3022   int unsignedp, volatilep;
3023
3024   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3025                               &unsignedp, &volatilep, false);
3026
3027   if (toffset != 0
3028       || bitpos % BITS_PER_UNIT != 0
3029       || TREE_CODE (core) != VAR_DECL)
3030     {
3031       *symbol_present = false;
3032       *var_present = true;
3033       fd_ivopts_data = data;
3034       walk_tree (&addr, find_depends, depends_on, NULL);
3035       return target_spill_cost;
3036     }
3037
3038   *offset += bitpos / BITS_PER_UNIT;
3039   if (TREE_STATIC (core)
3040       || DECL_EXTERNAL (core))
3041     {
3042       *symbol_present = true;
3043       *var_present = false;
3044       return 0;
3045     }
3046
3047   *symbol_present = false;
3048   *var_present = true;
3049   return 0;
3050 }
3051
3052 /* Estimates cost of expressing difference of addresses E1 - E2 as
3053    var + symbol + offset.  The value of offset is added to OFFSET,
3054    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3055    part is missing.  DEPENDS_ON is a set of the invariants the computation
3056    depends on.  */
3057
3058 static unsigned
3059 ptr_difference_cost (struct ivopts_data *data,
3060                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3061                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3062 {
3063   HOST_WIDE_INT diff = 0;
3064   unsigned cost;
3065
3066   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3067
3068   if (ptr_difference_const (e1, e2, &diff))
3069     {
3070       *offset += diff;
3071       *symbol_present = false;
3072       *var_present = false;
3073       return 0;
3074     }
3075
3076   if (e2 == integer_zero_node)
3077     return split_address_cost (data, TREE_OPERAND (e1, 0),
3078                                symbol_present, var_present, offset, depends_on);
3079
3080   *symbol_present = false;
3081   *var_present = true;
3082
3083   cost = force_var_cost (data, e1, depends_on);
3084   cost += force_var_cost (data, e2, depends_on);
3085   cost += add_cost (Pmode);
3086
3087   return cost;
3088 }
3089
3090 /* Estimates cost of expressing difference E1 - E2 as
3091    var + symbol + offset.  The value of offset is added to OFFSET,
3092    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3093    part is missing.  DEPENDS_ON is a set of the invariants the computation
3094    depends on.  */
3095
3096 static unsigned
3097 difference_cost (struct ivopts_data *data,
3098                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3099                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3100 {
3101   unsigned cost;
3102   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3103   unsigned HOST_WIDE_INT off1, off2;
3104
3105   e1 = strip_offset (e1, false, &off1);
3106   e2 = strip_offset (e2, false, &off2);
3107   *offset += off1 - off2;
3108
3109   STRIP_NOPS (e1);
3110   STRIP_NOPS (e2);
3111
3112   if (TREE_CODE (e1) == ADDR_EXPR)
3113     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3114                                 depends_on);
3115   *symbol_present = false;
3116
3117   if (operand_equal_p (e1, e2, 0))
3118     {
3119       *var_present = false;
3120       return 0;
3121     }
3122   *var_present = true;
3123   if (zero_p (e2))
3124     return force_var_cost (data, e1, depends_on);
3125
3126   if (zero_p (e1))
3127     {
3128       cost = force_var_cost (data, e2, depends_on);
3129       cost += multiply_by_cost (-1, mode);
3130
3131       return cost;
3132     }
3133
3134   cost = force_var_cost (data, e1, depends_on);
3135   cost += force_var_cost (data, e2, depends_on);
3136   cost += add_cost (mode);
3137
3138   return cost;
3139 }
3140
3141 /* Determines the cost of the computation by that USE is expressed
3142    from induction variable CAND.  If ADDRESS_P is true, we just need
3143    to create an address from it, otherwise we want to get it into
3144    register.  A set of invariants we depend on is stored in
3145    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3146
3147 static unsigned
3148 get_computation_cost_at (struct ivopts_data *data,
3149                          struct iv_use *use, struct iv_cand *cand,
3150                          bool address_p, bitmap *depends_on, tree at)
3151 {
3152   tree ubase = use->iv->base, ustep = use->iv->step;
3153   tree cbase, cstep;
3154   tree utype = TREE_TYPE (ubase), ctype;
3155   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3156   HOST_WIDE_INT ratio, aratio;
3157   bool var_present, symbol_present;
3158   unsigned cost = 0, n_sums;
3159
3160   *depends_on = NULL;
3161
3162   /* Only consider real candidates.  */
3163   if (!cand->iv)
3164     return INFTY;
3165
3166   cbase = cand->iv->base;
3167   cstep = cand->iv->step;
3168   ctype = TREE_TYPE (cbase);
3169
3170   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3171     {
3172       /* We do not have a precision to express the values of use.  */
3173       return INFTY;
3174     }
3175
3176   if (address_p)
3177     {
3178       /* Do not try to express address of an object with computation based
3179          on address of a different object.  This may cause problems in rtl
3180          level alias analysis (that does not expect this to be happening,
3181          as this is illegal in C), and would be unlikely to be useful
3182          anyway.  */
3183       if (use->iv->base_object
3184           && cand->iv->base_object
3185           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3186         return INFTY;
3187     }
3188
3189   if (!cst_and_fits_in_hwi (ustep)
3190       || !cst_and_fits_in_hwi (cstep))
3191     return INFTY;
3192
3193   if (TREE_CODE (ubase) == INTEGER_CST
3194       && !cst_and_fits_in_hwi (ubase))
3195     goto fallback;
3196
3197   if (TREE_CODE (cbase) == INTEGER_CST
3198       && !cst_and_fits_in_hwi (cbase))
3199     goto fallback;
3200
3201   ustepi = int_cst_value (ustep);
3202   cstepi = int_cst_value (cstep);
3203
3204   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3205     {
3206       /* TODO -- add direct handling of this case.  */
3207       goto fallback;
3208     }
3209
3210   if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3211     return INFTY;
3212
3213   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3214      or ratio == 1, it is better to handle this like
3215
3216      ubase - ratio * cbase + ratio * var
3217
3218      (also holds in the case ratio == -1, TODO.  */
3219
3220   if (TREE_CODE (cbase) == INTEGER_CST)
3221     {
3222       offset = - ratio * int_cst_value (cbase);
3223       cost += difference_cost (data,
3224                                ubase, integer_zero_node,
3225                                &symbol_present, &var_present, &offset,
3226                                depends_on);
3227     }
3228   else if (ratio == 1)
3229     {
3230       cost += difference_cost (data,
3231                                ubase, cbase,
3232                                &symbol_present, &var_present, &offset,
3233                                depends_on);
3234     }
3235   else
3236     {
3237       cost += force_var_cost (data, cbase, depends_on);
3238       cost += add_cost (TYPE_MODE (ctype));
3239       cost += difference_cost (data,
3240                                ubase, integer_zero_node,
3241                                &symbol_present, &var_present, &offset,
3242                                depends_on);
3243     }
3244
3245   /* If we are after the increment, the value of the candidate is higher by
3246      one iteration.  */
3247   if (stmt_after_increment (data->current_loop, cand, at))
3248     offset -= ratio * cstepi;
3249
3250   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3251      (symbol/var/const parts may be omitted).  If we are looking for an address,
3252      find the cost of addressing this.  */
3253   if (address_p)
3254     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3255
3256   /* Otherwise estimate the costs for computing the expression.  */
3257   aratio = ratio > 0 ? ratio : -ratio;
3258   if (!symbol_present && !var_present && !offset)
3259     {
3260       if (ratio != 1)
3261         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3262
3263       return cost;
3264     }
3265
3266   if (aratio != 1)
3267     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3268
3269   n_sums = 1;
3270   if (var_present
3271       /* Symbol + offset should be compile-time computable.  */
3272       && (symbol_present || offset))
3273     n_sums++;
3274
3275   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3276
3277 fallback:
3278   {
3279     /* Just get the expression, expand it and measure the cost.  */
3280     tree comp = get_computation_at (data->current_loop, use, cand, at);
3281
3282     if (!comp)
3283       return INFTY;
3284
3285     if (address_p)
3286       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3287
3288     return computation_cost (comp);
3289   }
3290 }
3291
3292 /* Determines the cost of the computation by that USE is expressed
3293    from induction variable CAND.  If ADDRESS_P is true, we just need
3294    to create an address from it, otherwise we want to get it into
3295    register.  A set of invariants we depend on is stored in
3296    DEPENDS_ON.  */
3297
3298 static unsigned
3299 get_computation_cost (struct ivopts_data *data,
3300                       struct iv_use *use, struct iv_cand *cand,
3301                       bool address_p, bitmap *depends_on)
3302 {
3303   return get_computation_cost_at (data,
3304                                   use, cand, address_p, depends_on, use->stmt);
3305 }
3306
3307 /* Determines cost of basing replacement of USE on CAND in a generic
3308    expression.  */
3309
3310 static bool
3311 determine_use_iv_cost_generic (struct ivopts_data *data,
3312                                struct iv_use *use, struct iv_cand *cand)
3313 {
3314   bitmap depends_on;
3315   unsigned cost;
3316
3317   /* The simple case first -- if we need to express value of the preserved
3318      original biv, the cost is 0.  This also prevents us from counting the
3319      cost of increment twice -- once at this use and once in the cost of
3320      the candidate.  */
3321   if (cand->pos == IP_ORIGINAL
3322       && cand->incremented_at == use->stmt)
3323     {
3324       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3325       return true;
3326     }
3327
3328   cost = get_computation_cost (data, use, cand, false, &depends_on);
3329   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3330
3331   return cost != INFTY;
3332 }
3333
3334 /* Determines cost of basing replacement of USE on CAND in an address.  */
3335
3336 static bool
3337 determine_use_iv_cost_address (struct ivopts_data *data,
3338                                struct iv_use *use, struct iv_cand *cand)
3339 {
3340   bitmap depends_on;
3341   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3342
3343   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3344
3345   return cost != INFTY;
3346 }
3347
3348 /* Computes value of induction variable IV in iteration NITER.  */
3349
3350 static tree
3351 iv_value (struct iv *iv, tree niter)
3352 {
3353   tree val;
3354   tree type = TREE_TYPE (iv->base);
3355
3356   niter = fold_convert (type, niter);
3357   val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3358
3359   return fold (build2 (PLUS_EXPR, type, iv->base, val));
3360 }
3361
3362 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3363
3364 static tree
3365 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3366 {
3367   tree val = iv_value (cand->iv, niter);
3368   tree type = TREE_TYPE (cand->iv->base);
3369
3370   if (stmt_after_increment (loop, cand, at))
3371     val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3372
3373   return val;
3374 }
3375
3376 /* Returns period of induction variable iv.  */
3377
3378 static tree
3379 iv_period (struct iv *iv)
3380 {
3381   tree step = iv->step, period, type;
3382   tree pow2div;
3383
3384   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3385
3386   /* Period of the iv is gcd (step, type range).  Since type range is power
3387      of two, it suffices to determine the maximum power of two that divides
3388      step.  */
3389   pow2div = num_ending_zeros (step);
3390   type = unsigned_type_for (TREE_TYPE (step));
3391
3392   period = build_low_bits_mask (type,
3393                                 (TYPE_PRECISION (type)
3394                                  - tree_low_cst (pow2div, 1)));
3395
3396   return period;
3397 }
3398
3399 /* Returns the comparison operator used when eliminating the iv USE.  */
3400
3401 static enum tree_code
3402 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3403 {
3404   struct loop *loop = data->current_loop;
3405   basic_block ex_bb;
3406   edge exit;
3407
3408   ex_bb = bb_for_stmt (use->stmt);
3409   exit = EDGE_SUCC (ex_bb, 0);
3410   if (flow_bb_inside_loop_p (loop, exit->dest))
3411     exit = EDGE_SUCC (ex_bb, 1);
3412
3413   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3414 }
3415
3416 /* Check whether it is possible to express the condition in USE by comparison
3417    of candidate CAND.  If so, store the value compared with to BOUND.  */
3418
3419 static bool
3420 may_eliminate_iv (struct ivopts_data *data,
3421                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3422 {
3423   basic_block ex_bb;
3424   edge exit;
3425   struct tree_niter_desc *niter;
3426   tree nit, nit_type;
3427   tree wider_type, period, per_type;
3428   struct loop *loop = data->current_loop;
3429
3430   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3431      for other conditions inside loop body.  */
3432   ex_bb = bb_for_stmt (use->stmt);
3433   if (use->stmt != last_stmt (ex_bb)
3434       || TREE_CODE (use->stmt) != COND_EXPR)
3435     return false;
3436   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3437     return false;
3438
3439   exit = EDGE_SUCC (ex_bb, 0);
3440   if (flow_bb_inside_loop_p (loop, exit->dest))
3441     exit = EDGE_SUCC (ex_bb, 1);
3442   if (flow_bb_inside_loop_p (loop, exit->dest))
3443     return false;
3444
3445   niter = niter_for_exit (data, exit);
3446   if (!niter
3447       || !zero_p (niter->may_be_zero))
3448     return false;
3449
3450   nit = niter->niter;
3451   nit_type = TREE_TYPE (nit);
3452
3453   /* Determine whether we may use the variable to test whether niter iterations
3454      elapsed.  This is the case iff the period of the induction variable is
3455      greater than the number of iterations.  */
3456   period = iv_period (cand->iv);
3457   if (!period)
3458     return false;
3459   per_type = TREE_TYPE (period);
3460
3461   wider_type = TREE_TYPE (period);
3462   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3463     wider_type = per_type;
3464   else
3465     wider_type = nit_type;
3466
3467   if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3468                                        fold_convert (wider_type, period),
3469                                        fold_convert (wider_type, nit)))))
3470     return false;
3471
3472   *bound = cand_value_at (loop, cand, use->stmt, nit);
3473   return true;
3474 }
3475
3476 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3477
3478 static bool
3479 determine_use_iv_cost_condition (struct ivopts_data *data,
3480                                  struct iv_use *use, struct iv_cand *cand)
3481 {
3482   tree bound = NULL_TREE, op, cond;
3483   bitmap depends_on = NULL;
3484   unsigned cost;
3485
3486   /* Only consider real candidates.  */
3487   if (!cand->iv)
3488     {
3489       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3490       return false;
3491     }
3492
3493   if (may_eliminate_iv (data, use, cand, &bound))
3494     {
3495       cost = force_var_cost (data, bound, &depends_on);
3496
3497       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3498       return cost != INFTY;
3499     }
3500
3501   /* The induction variable elimination failed; just express the original
3502      giv.  If it is compared with an invariant, note that we cannot get
3503      rid of it.  */
3504   cost = get_computation_cost (data, use, cand, false, &depends_on);
3505
3506   cond = *use->op_p;
3507   if (TREE_CODE (cond) != SSA_NAME)
3508     {
3509       op = TREE_OPERAND (cond, 0);
3510       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
3511         op = TREE_OPERAND (cond, 1);
3512       if (TREE_CODE (op) == SSA_NAME)
3513         {
3514           op = get_iv (data, op)->base;
3515           fd_ivopts_data = data;
3516           walk_tree (&op, find_depends, &depends_on, NULL);
3517         }
3518     }
3519
3520   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
3521   return cost != INFTY;
3522 }
3523
3524 /* Checks whether it is possible to replace the final value of USE by
3525    a direct computation.  If so, the formula is stored to *VALUE.  */
3526
3527 static bool
3528 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
3529                          tree *value)
3530 {
3531   struct loop *loop = data->current_loop;
3532   edge exit;
3533   struct tree_niter_desc *niter;
3534
3535   exit = single_dom_exit (loop);
3536   if (!exit)
3537     return false;
3538
3539   gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3540                               bb_for_stmt (use->stmt)));
3541
3542   niter = niter_for_single_dom_exit (data);
3543   if (!niter
3544       || !zero_p (niter->may_be_zero))
3545     return false;
3546
3547   *value = iv_value (use->iv, niter->niter);
3548
3549   return true;
3550 }
3551
3552 /* Determines cost of replacing final value of USE using CAND.  */
3553
3554 static bool
3555 determine_use_iv_cost_outer (struct ivopts_data *data,
3556                              struct iv_use *use, struct iv_cand *cand)
3557 {
3558   bitmap depends_on;
3559   unsigned cost;
3560   edge exit;
3561   tree value = NULL_TREE;
3562   struct loop *loop = data->current_loop;
3563
3564   /* The simple case first -- if we need to express value of the preserved
3565      original biv, the cost is 0.  This also prevents us from counting the
3566      cost of increment twice -- once at this use and once in the cost of
3567      the candidate.  */
3568   if (cand->pos == IP_ORIGINAL
3569       && cand->incremented_at == use->stmt)
3570     {
3571       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3572       return true;
3573     }
3574
3575   if (!cand->iv)
3576     {
3577       if (!may_replace_final_value (data, use, &value))
3578         {
3579           set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3580           return false;
3581         }
3582
3583       depends_on = NULL;
3584       cost = force_var_cost (data, value, &depends_on);
3585
3586       cost /= AVG_LOOP_NITER (loop);
3587
3588       set_use_iv_cost (data, use, cand, cost, depends_on, value);
3589       return cost != INFTY;
3590     }
3591
3592   exit = single_dom_exit (loop);
3593   if (exit)
3594     {
3595       /* If there is just a single exit, we may use value of the candidate
3596          after we take it to determine the value of use.  */
3597       cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3598                                       last_stmt (exit->src));
3599       if (cost != INFTY)
3600         cost /= AVG_LOOP_NITER (loop);
3601     }
3602   else
3603     {
3604       /* Otherwise we just need to compute the iv.  */
3605       cost = get_computation_cost (data, use, cand, false, &depends_on);
3606     }
3607
3608   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3609
3610   return cost != INFTY;
3611 }
3612
3613 /* Determines cost of basing replacement of USE on CAND.  Returns false
3614    if USE cannot be based on CAND.  */
3615
3616 static bool
3617 determine_use_iv_cost (struct ivopts_data *data,
3618                        struct iv_use *use, struct iv_cand *cand)
3619 {
3620   switch (use->type)
3621     {
3622     case USE_NONLINEAR_EXPR:
3623       return determine_use_iv_cost_generic (data, use, cand);
3624
3625     case USE_OUTER:
3626       return determine_use_iv_cost_outer (data, use, cand);
3627
3628     case USE_ADDRESS:
3629       return determine_use_iv_cost_address (data, use, cand);
3630
3631     case USE_COMPARE:
3632       return determine_use_iv_cost_condition (data, use, cand);
3633
3634     default:
3635       gcc_unreachable ();
3636     }
3637 }
3638
3639 /* Determines costs of basing the use of the iv on an iv candidate.  */
3640
3641 static void
3642 determine_use_iv_costs (struct ivopts_data *data)
3643 {
3644   unsigned i, j;
3645   struct iv_use *use;
3646   struct iv_cand *cand;
3647   bitmap to_clear = BITMAP_ALLOC (NULL);
3648
3649   alloc_use_cost_map (data);
3650
3651   for (i = 0; i < n_iv_uses (data); i++)
3652     {
3653       use = iv_use (data, i);
3654
3655       if (data->consider_all_candidates)
3656         {
3657           for (j = 0; j < n_iv_cands (data); j++)
3658             {
3659               cand = iv_cand (data, j);
3660               determine_use_iv_cost (data, use, cand);
3661             }
3662         }
3663       else
3664         {
3665           bitmap_iterator bi;
3666
3667           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3668             {
3669               cand = iv_cand (data, j);
3670               if (!determine_use_iv_cost (data, use, cand))
3671                 bitmap_set_bit (to_clear, j);
3672             }
3673
3674           /* Remove the candidates for that the cost is infinite from
3675              the list of related candidates.  */
3676           bitmap_and_compl_into (use->related_cands, to_clear);
3677           bitmap_clear (to_clear);
3678         }
3679     }
3680
3681   BITMAP_FREE (to_clear);
3682
3683   if (dump_file && (dump_flags & TDF_DETAILS))
3684     {
3685       fprintf (dump_file, "Use-candidate costs:\n");
3686
3687       for (i = 0; i < n_iv_uses (data); i++)
3688         {
3689           use = iv_use (data, i);
3690
3691           fprintf (dump_file, "Use %d:\n", i);
3692           fprintf (dump_file, "  cand\tcost\tdepends on\n");
3693           for (j = 0; j < use->n_map_members; j++)
3694             {
3695               if (!use->cost_map[j].cand
3696                   || use->cost_map[j].cost == INFTY)
3697                 continue;
3698
3699               fprintf (dump_file, "  %d\t%d\t",
3700                        use->cost_map[j].cand->id,
3701                        use->cost_map[j].cost);
3702               if (use->cost_map[j].depends_on)
3703                 bitmap_print (dump_file,
3704                               use->cost_map[j].depends_on, "","");
3705               fprintf (dump_file, "\n");
3706             }
3707
3708           fprintf (dump_file, "\n");
3709         }
3710       fprintf (dump_file, "\n");
3711     }
3712 }
3713
3714 /* Determines cost of the candidate CAND.  */
3715
3716 static void
3717 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3718 {
3719   unsigned cost_base, cost_step;
3720   tree base;
3721
3722   if (!cand->iv)
3723     {
3724       cand->cost = 0;
3725       return;
3726     }
3727
3728   /* There are two costs associated with the candidate -- its increment
3729      and its initialization.  The second is almost negligible for any loop
3730      that rolls enough, so we take it just very little into account.  */
3731
3732   base = cand->iv->base;
3733   cost_base = force_var_cost (data, base, NULL);
3734   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3735
3736   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3737
3738   /* Prefer the original iv unless we may gain something by replacing it;
3739      this is not really relevant for artificial ivs created by other
3740      passes.  */
3741   if (cand->pos == IP_ORIGINAL
3742       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3743     cand->cost--;
3744
3745   /* Prefer not to insert statements into latch unless there are some
3746      already (so that we do not create unnecessary jumps).  */
3747   if (cand->pos == IP_END
3748       && empty_block_p (ip_end_pos (data->current_loop)))
3749     cand->cost++;
3750 }
3751
3752 /* Determines costs of computation of the candidates.  */
3753
3754 static void
3755 determine_iv_costs (struct ivopts_data *data)
3756 {
3757   unsigned i;
3758
3759   if (dump_file && (dump_flags & TDF_DETAILS))
3760     {
3761       fprintf (dump_file, "Candidate costs:\n");
3762       fprintf (dump_file, "  cand\tcost\n");
3763     }
3764
3765   for (i = 0; i < n_iv_cands (data); i++)
3766     {
3767       struct iv_cand *cand = iv_cand (data, i);
3768
3769       determine_iv_cost (data, cand);
3770
3771       if (dump_file && (dump_flags & TDF_DETAILS))
3772         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3773     }
3774
3775 if (dump_file && (dump_flags & TDF_DETAILS))
3776       fprintf (dump_file, "\n");
3777 }
3778
3779 /* Calculates cost for having SIZE induction variables.  */
3780
3781 static unsigned
3782 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3783 {
3784   return global_cost_for_size (size,
3785                                loop_data (data->current_loop)->regs_used,
3786                                n_iv_uses (data));
3787 }
3788
3789 /* For each size of the induction variable set determine the penalty.  */
3790
3791 static void
3792 determine_set_costs (struct ivopts_data *data)
3793 {
3794   unsigned j, n;
3795   tree phi, op;
3796   struct loop *loop = data->current_loop;
3797   bitmap_iterator bi;
3798
3799   /* We use the following model (definitely improvable, especially the
3800      cost function -- TODO):
3801
3802      We estimate the number of registers available (using MD data), name it A.
3803
3804      We estimate the number of registers used by the loop, name it U.  This
3805      number is obtained as the number of loop phi nodes (not counting virtual
3806      registers and bivs) + the number of variables from outside of the loop.
3807
3808      We set a reserve R (free regs that are used for temporary computations,
3809      etc.).  For now the reserve is a constant 3.
3810
3811      Let I be the number of induction variables.
3812
3813      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3814         make a lot of ivs without a reason).
3815      -- if A - R < U + I <= A, the cost is I * PRES_COST
3816      -- if U + I > A, the cost is I * PRES_COST and
3817         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
3818
3819   if (dump_file && (dump_flags & TDF_DETAILS))
3820     {
3821       fprintf (dump_file, "Global costs:\n");
3822       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
3823       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
3824       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
3825       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
3826     }
3827
3828   n = 0;
3829   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3830     {
3831       op = PHI_RESULT (phi);
3832
3833       if (!is_gimple_reg (op))
3834         continue;
3835
3836       if (get_iv (data, op))
3837         continue;
3838
3839       n++;
3840     }
3841
3842   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3843     {
3844       struct version_info *info = ver_info (data, j);
3845
3846       if (info->inv_id && info->has_nonlin_use)
3847         n++;
3848     }
3849
3850   loop_data (loop)->regs_used = n;
3851   if (dump_file && (dump_flags & TDF_DETAILS))
3852     fprintf (dump_file, "  regs_used %d\n", n);
3853
3854   if (dump_file && (dump_flags & TDF_DETAILS))
3855     {
3856       fprintf (dump_file, "  cost for size:\n");
3857       fprintf (dump_file, "  ivs\tcost\n");
3858       for (j = 0; j <= 2 * target_avail_regs; j++)
3859         fprintf (dump_file, "  %d\t%d\n", j,
3860                  ivopts_global_cost_for_size (data, j));
3861       fprintf (dump_file, "\n");
3862     }
3863 }
3864
3865 /* Returns true if A is a cheaper cost pair than B.  */
3866
3867 static bool
3868 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3869 {
3870   if (!a)
3871     return false;
3872
3873   if (!b)
3874     return true;
3875
3876   if (a->cost < b->cost)
3877     return true;
3878
3879   if (a->cost > b->cost)
3880     return false;
3881
3882   /* In case the costs are the same, prefer the cheaper candidate.  */
3883   if (a->cand->cost < b->cand->cost)
3884     return true;
3885
3886   return false;
3887 }
3888
3889 /* Computes the cost field of IVS structure.  */
3890
3891 static void
3892 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3893 {
3894   unsigned cost = 0;
3895
3896   cost += ivs->cand_use_cost;
3897   cost += ivs->cand_cost;
3898   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3899
3900   ivs->cost = cost;
3901 }
3902
3903 /* Set USE not to be expressed by any candidate in IVS.  */
3904
3905 static void
3906 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3907                  struct iv_use *use)
3908 {
3909   unsigned uid = use->id, cid, iid;
3910   bitmap deps;
3911   struct cost_pair *cp;
3912   bitmap_iterator bi;
3913
3914   cp = ivs->cand_for_use[uid];
3915   if (!cp)
3916     return;
3917   cid = cp->cand->id;
3918
3919   ivs->bad_uses++;
3920   ivs->cand_for_use[uid] = NULL;
3921   ivs->n_cand_uses[cid]--;
3922
3923   if (ivs->n_cand_uses[cid] == 0)
3924     {
3925       bitmap_clear_bit (ivs->cands, cid);
3926       /* Do not count the pseudocandidates.  */
3927       if (cp->cand->iv)
3928         ivs->n_regs--;
3929       ivs->n_cands--;
3930       ivs->cand_cost -= cp->cand->cost;
3931     }
3932
3933   ivs->cand_use_cost -= cp->cost;
3934
3935   deps = cp->depends_on;
3936
3937   if (deps)
3938     {
3939       EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3940         {
3941           ivs->n_invariant_uses[iid]--;
3942           if (ivs->n_invariant_uses[iid] == 0)
3943             ivs->n_regs--;
3944         }
3945     }
3946
3947   iv_ca_recount_cost (data, ivs);
3948 }
3949
3950 /* Set cost pair for USE in set IVS to CP.  */
3951
3952 static void
3953 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3954               struct iv_use *use, struct cost_pair *cp)
3955 {
3956   unsigned uid = use->id, cid, iid;
3957   bitmap deps;
3958   bitmap_iterator bi;
3959
3960   if (ivs->cand_for_use[uid] == cp)
3961     return;
3962
3963   if (ivs->cand_for_use[uid])
3964     iv_ca_set_no_cp (data, ivs, use);
3965
3966   if (cp)
3967     {
3968       cid = cp->cand->id;
3969
3970       ivs->bad_uses--;
3971       ivs->cand_for_use[uid] = cp;
3972       ivs->n_cand_uses[cid]++;
3973       if (ivs->n_cand_uses[cid] == 1)
3974         {
3975           bitmap_set_bit (ivs->cands, cid);
3976           /* Do not count the pseudocandidates.  */
3977           if (cp->cand->iv)
3978             ivs->n_regs++;
3979           ivs->n_cands++;
3980           ivs->cand_cost += cp->cand->cost;
3981         }
3982
3983       ivs->cand_use_cost += cp->cost;
3984
3985       deps = cp->depends_on;
3986
3987       if (deps)
3988         {
3989           EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3990             {
3991               ivs->n_invariant_uses[iid]++;
3992               if (ivs->n_invariant_uses[iid] == 1)
3993                 ivs->n_regs++;
3994             }
3995         }
3996
3997       iv_ca_recount_cost (data, ivs);
3998     }
3999 }
4000
4001 /* Extend set IVS by expressing USE by some of the candidates in it
4002    if possible.  */
4003
4004 static void
4005 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4006                struct iv_use *use)
4007 {
4008   struct cost_pair *best_cp = NULL, *cp;
4009   bitmap_iterator bi;
4010   unsigned i;
4011
4012   gcc_assert (ivs->upto >= use->id);
4013
4014   if (ivs->upto == use->id)
4015     {
4016       ivs->upto++;
4017       ivs->bad_uses++;
4018     }
4019
4020   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4021     {
4022       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4023
4024       if (cheaper_cost_pair (cp, best_cp))
4025         best_cp = cp;
4026     }
4027
4028   iv_ca_set_cp (data, ivs, use, best_cp);
4029 }
4030
4031 /* Get cost for assignment IVS.  */
4032
4033 static unsigned
4034 iv_ca_cost (struct iv_ca *ivs)
4035 {
4036   return (ivs->bad_uses ? INFTY : ivs->cost);
4037 }
4038
4039 /* Returns true if all dependences of CP are among invariants in IVS.  */
4040
4041 static bool
4042 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4043 {
4044   unsigned i;
4045   bitmap_iterator bi;
4046
4047   if (!cp->depends_on)
4048     return true;
4049
4050   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4051     {
4052       if (ivs->n_invariant_uses[i] == 0)
4053         return false;
4054     }
4055
4056   return true;
4057 }
4058
4059 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4060    it before NEXT_CHANGE.  */
4061
4062 static struct iv_ca_delta *
4063 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4064                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4065 {
4066   struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4067
4068   change->use = use;
4069   change->old_cp = old_cp;
4070   change->new_cp = new_cp;
4071   change->next_change = next_change;
4072
4073   return change;
4074 }
4075
4076 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4077    are rewritten.  */
4078
4079 static struct iv_ca_delta *
4080 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4081 {
4082   struct iv_ca_delta *last;
4083
4084   if (!l2)
4085     return l1;
4086
4087   if (!l1)
4088     return l2;
4089
4090   for (last = l1; last->next_change; last = last->next_change)
4091     continue;
4092   last->next_change = l2;
4093
4094   return l1;
4095 }
4096
4097 /* Returns candidate by that USE is expressed in IVS.  */
4098
4099 static struct cost_pair *
4100 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4101 {
4102   return ivs->cand_for_use[use->id];
4103 }
4104
4105 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4106
4107 static struct iv_ca_delta *
4108 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4109 {
4110   struct iv_ca_delta *act, *next, *prev = NULL;
4111   struct cost_pair *tmp;
4112
4113   for (act = delta; act; act = next)
4114     {
4115       next = act->next_change;
4116       act->next_change = prev;
4117       prev = act;
4118
4119       tmp = act->old_cp;
4120       act->old_cp = act->new_cp;
4121       act->new_cp = tmp;
4122     }
4123
4124   return prev;
4125 }
4126
4127 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4128    reverted instead.  */
4129
4130 static void
4131 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4132                     struct iv_ca_delta *delta, bool forward)
4133 {
4134   struct cost_pair *from, *to;
4135   struct iv_ca_delta *act;
4136
4137   if (!forward)
4138     delta = iv_ca_delta_reverse (delta);
4139
4140   for (act = delta; act; act = act->next_change)
4141     {
4142       from = act->old_cp;
4143       to = act->new_cp;
4144       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4145       iv_ca_set_cp (data, ivs, act->use, to);
4146     }
4147
4148   if (!forward)
4149     iv_ca_delta_reverse (delta);
4150 }
4151
4152 /* Returns true if CAND is used in IVS.  */
4153
4154 static bool
4155 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4156 {
4157   return ivs->n_cand_uses[cand->id] > 0;
4158 }
4159
4160 /* Returns number of induction variable candidates in the set IVS.  */
4161
4162 static unsigned
4163 iv_ca_n_cands (struct iv_ca *ivs)
4164 {
4165   return ivs->n_cands;
4166 }
4167
4168 /* Free the list of changes DELTA.  */
4169
4170 static void
4171 iv_ca_delta_free (struct iv_ca_delta **delta)
4172 {
4173   struct iv_ca_delta *act, *next;
4174
4175   for (act = *delta; act; act = next)
4176     {
4177       next = act->next_change;
4178       free (act);
4179     }
4180
4181   *delta = NULL;
4182 }
4183
4184 /* Allocates new iv candidates assignment.  */
4185
4186 static struct iv_ca *
4187 iv_ca_new (struct ivopts_data *data)
4188 {
4189   struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4190
4191   nw->upto = 0;
4192   nw->bad_uses = 0;
4193   nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4194   nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4195   nw->cands = BITMAP_ALLOC (NULL);
4196   nw->n_cands = 0;
4197   nw->n_regs = 0;
4198   nw->cand_use_cost = 0;
4199   nw->cand_cost = 0;
4200   nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4201   nw->cost = 0;
4202
4203   return nw;
4204 }
4205
4206 /* Free memory occupied by the set IVS.  */
4207
4208 static void
4209 iv_ca_free (struct iv_ca **ivs)
4210 {
4211   free ((*ivs)->cand_for_use);
4212   free ((*ivs)->n_cand_uses);
4213   BITMAP_FREE ((*ivs)->cands);
4214   free ((*ivs)->n_invariant_uses);
4215   free (*ivs);
4216   *ivs = NULL;
4217 }
4218
4219 /* Dumps IVS to FILE.  */
4220
4221 static void
4222 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4223 {
4224   const char *pref = "  invariants ";
4225   unsigned i;
4226
4227   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4228   bitmap_print (file, ivs->cands, "  candidates ","\n");
4229
4230   for (i = 1; i <= data->max_inv_id; i++)
4231     if (ivs->n_invariant_uses[i])
4232       {
4233         fprintf (file, "%s%d", pref, i);
4234         pref = ", ";
4235       }
4236   fprintf (file, "\n");
4237 }
4238
4239 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4240    new set, and store differences in DELTA.  Number of induction variables
4241    in the new set is stored to N_IVS.  */
4242
4243 static unsigned
4244 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4245               struct iv_cand *cand, struct iv_ca_delta **delta,
4246               unsigned *n_ivs)
4247 {
4248   unsigned i, cost;
4249   struct iv_use *use;
4250   struct cost_pair *old_cp, *new_cp;
4251
4252   *delta = NULL;
4253   for (i = 0; i < ivs->upto; i++)
4254     {
4255       use = iv_use (data, i);
4256       old_cp = iv_ca_cand_for_use (ivs, use);
4257
4258       if (old_cp
4259           && old_cp->cand == cand)
4260         continue;
4261
4262       new_cp = get_use_iv_cost (data, use, cand);
4263       if (!new_cp)
4264         continue;
4265
4266       if (!iv_ca_has_deps (ivs, new_cp))
4267         continue;
4268
4269       if (!cheaper_cost_pair (new_cp, old_cp))
4270         continue;
4271
4272       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4273     }
4274
4275   iv_ca_delta_commit (data, ivs, *delta, true);
4276   cost = iv_ca_cost (ivs);
4277   if (n_ivs)
4278     *n_ivs = iv_ca_n_cands (ivs);
4279   iv_ca_delta_commit (data, ivs, *delta, false);
4280
4281   return cost;
4282 }
4283
4284 /* Try narrowing set IVS by removing CAND.  Return the cost of
4285    the new set and store the differences in DELTA.  */
4286
4287 static unsigned
4288 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4289               struct iv_cand *cand, struct iv_ca_delta **delta)
4290 {
4291   unsigned i, ci;
4292   struct iv_use *use;
4293   struct cost_pair *old_cp, *new_cp, *cp;
4294   bitmap_iterator bi;
4295   struct iv_cand *cnd;
4296   unsigned cost;
4297
4298   *delta = NULL;
4299   for (i = 0; i < n_iv_uses (data); i++)
4300     {
4301       use = iv_use (data, i);
4302
4303       old_cp = iv_ca_cand_for_use (ivs, use);
4304       if (old_cp->cand != cand)
4305         continue;
4306
4307       new_cp = NULL;
4308
4309       if (data->consider_all_candidates)
4310         {
4311           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4312             {
4313               if (ci == cand->id)
4314                 continue;
4315
4316               cnd = iv_cand (data, ci);
4317
4318               cp = get_use_iv_cost (data, use, cnd);
4319               if (!cp)
4320                 continue;
4321               if (!iv_ca_has_deps (ivs, cp))
4322                 continue;
4323
4324               if (!cheaper_cost_pair (cp, new_cp))
4325                 continue;
4326
4327               new_cp = cp;
4328             }
4329         }
4330       else
4331         {
4332           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4333             {
4334               if (ci == cand->id)
4335                 continue;
4336
4337               cnd = iv_cand (data, ci);
4338
4339               cp = get_use_iv_cost (data, use, cnd);
4340               if (!cp)
4341                 continue;
4342               if (!iv_ca_has_deps (ivs, cp))
4343                 continue;
4344
4345               if (!cheaper_cost_pair (cp, new_cp))
4346                 continue;
4347
4348               new_cp = cp;
4349             }
4350         }
4351
4352       if (!new_cp)
4353         {
4354           iv_ca_delta_free (delta);
4355           return INFTY;
4356         }
4357
4358       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4359     }
4360
4361   iv_ca_delta_commit (data, ivs, *delta, true);
4362   cost = iv_ca_cost (ivs);
4363   iv_ca_delta_commit (data, ivs, *delta, false);
4364
4365   return cost;
4366 }
4367
4368 /* Try optimizing the set of candidates IVS by removing candidates different
4369    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4370    differences in DELTA.  */
4371
4372 static unsigned
4373 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4374              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4375 {
4376   bitmap_iterator bi;
4377   struct iv_ca_delta *act_delta, *best_delta;
4378   unsigned i, best_cost, acost;
4379   struct iv_cand *cand;
4380
4381   best_delta = NULL;
4382   best_cost = iv_ca_cost (ivs);
4383
4384   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4385     {
4386       cand = iv_cand (data, i);
4387
4388       if (cand == except_cand)
4389         continue;
4390
4391       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4392
4393       if (acost < best_cost)
4394         {
4395           best_cost = acost;
4396           iv_ca_delta_free (&best_delta);
4397           best_delta = act_delta;
4398         }
4399       else
4400         iv_ca_delta_free (&act_delta);
4401     }
4402
4403   if (!best_delta)
4404     {
4405       *delta = NULL;
4406       return best_cost;
4407     }
4408
4409   /* Recurse to possibly remove other unnecessary ivs.  */
4410   iv_ca_delta_commit (data, ivs, best_delta, true);
4411   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4412   iv_ca_delta_commit (data, ivs, best_delta, false);
4413   *delta = iv_ca_delta_join (best_delta, *delta);
4414   return best_cost;
4415 }
4416
4417 /* Tries to extend the sets IVS in the best possible way in order
4418    to express the USE.  */
4419
4420 static bool
4421 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4422                   struct iv_use *use)
4423 {
4424   unsigned best_cost, act_cost;
4425   unsigned i;
4426   bitmap_iterator bi;
4427   struct iv_cand *cand;
4428   struct iv_ca_delta *best_delta = NULL, *act_delta;
4429   struct cost_pair *cp;
4430
4431   iv_ca_add_use (data, ivs, use);
4432   best_cost = iv_ca_cost (ivs);
4433
4434   cp = iv_ca_cand_for_use (ivs, use);
4435   if (cp)
4436     {
4437       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4438       iv_ca_set_no_cp (data, ivs, use);
4439     }
4440
4441   /* First try important candidates.  Only if it fails, try the specific ones.
4442      Rationale -- in loops with many variables the best choice often is to use
4443      just one generic biv.  If we added here many ivs specific to the uses,
4444      the optimization algorithm later would be likely to get stuck in a local
4445      minimum, thus causing us to create too many ivs.  The approach from
4446      few ivs to more seems more likely to be successful -- starting from few
4447      ivs, replacing an expensive use by a specific iv should always be a
4448      win.  */
4449   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4450     {
4451       cand = iv_cand (data, i);
4452
4453       if (iv_ca_cand_used_p (ivs, cand))
4454         continue;
4455
4456       cp = get_use_iv_cost (data, use, cand);
4457       if (!cp)
4458         continue;
4459
4460       iv_ca_set_cp (data, ivs, use, cp);
4461       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4462       iv_ca_set_no_cp (data, ivs, use);
4463       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4464
4465       if (act_cost < best_cost)
4466         {
4467           best_cost = act_cost;
4468
4469           iv_ca_delta_free (&best_delta);
4470           best_delta = act_delta;
4471         }
4472       else
4473         iv_ca_delta_free (&act_delta);
4474     }
4475
4476   if (best_cost == INFTY)
4477     {
4478       for (i = 0; i < use->n_map_members; i++)
4479         {
4480           cp = use->cost_map + i;
4481           cand = cp->cand;
4482           if (!cand)
4483             continue;
4484
4485           /* Already tried this.  */
4486           if (cand->important)
4487             continue;
4488
4489           if (iv_ca_cand_used_p (ivs, cand))
4490             continue;
4491
4492           act_delta = NULL;
4493           iv_ca_set_cp (data, ivs, use, cp);
4494           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4495           iv_ca_set_no_cp (data, ivs, use);
4496           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4497                                        cp, act_delta);
4498
4499           if (act_cost < best_cost)
4500             {
4501               best_cost = act_cost;
4502
4503               if (best_delta)
4504                 iv_ca_delta_free (&best_delta);
4505               best_delta = act_delta;
4506             }
4507           else
4508             iv_ca_delta_free (&act_delta);
4509         }
4510     }
4511
4512   iv_ca_delta_commit (data, ivs, best_delta, true);
4513   iv_ca_delta_free (&best_delta);
4514
4515   return (best_cost != INFTY);
4516 }
4517
4518 /* Finds an initial assignment of candidates to uses.  */
4519
4520 static struct iv_ca *
4521 get_initial_solution (struct ivopts_data *data)
4522 {
4523   struct iv_ca *ivs = iv_ca_new (data);
4524   unsigned i;
4525
4526   for (i = 0; i < n_iv_uses (data); i++)
4527     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4528       {
4529         iv_ca_free (&ivs);
4530         return NULL;
4531       }
4532
4533   return ivs;
4534 }
4535
4536 /* Tries to improve set of induction variables IVS.  */
4537
4538 static bool
4539 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4540 {
4541   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4542   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4543   struct iv_cand *cand;
4544
4545   /* Try extending the set of induction variables by one.  */
4546   for (i = 0; i < n_iv_cands (data); i++)
4547     {
4548       cand = iv_cand (data, i);
4549
4550       if (iv_ca_cand_used_p (ivs, cand))
4551         continue;
4552
4553       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4554       if (!act_delta)
4555         continue;
4556
4557       /* If we successfully added the candidate and the set is small enough,
4558          try optimizing it by removing other candidates.  */
4559       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4560         {
4561           iv_ca_delta_commit (data, ivs, act_delta, true);
4562           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4563           iv_ca_delta_commit (data, ivs, act_delta, false);
4564           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4565         }
4566
4567       if (acost < best_cost)
4568         {
4569           best_cost = acost;
4570           iv_ca_delta_free (&best_delta);
4571           best_delta = act_delta;
4572         }
4573       else
4574         iv_ca_delta_free (&act_delta);
4575     }
4576
4577   if (!best_delta)
4578     {
4579       /* Try removing the candidates from the set instead.  */
4580       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4581
4582       /* Nothing more we can do.  */
4583       if (!best_delta)
4584         return false;
4585     }
4586
4587   iv_ca_delta_commit (data, ivs, best_delta, true);
4588   gcc_assert (best_cost == iv_ca_cost (ivs));
4589   iv_ca_delta_free (&best_delta);
4590   return true;
4591 }
4592
4593 /* Attempts to find the optimal set of induction variables.  We do simple
4594    greedy heuristic -- we try to replace at most one candidate in the selected
4595    solution and remove the unused ivs while this improves the cost.  */
4596
4597 static struct iv_ca *
4598 find_optimal_iv_set (struct ivopts_data *data)
4599 {
4600   unsigned i;
4601   struct iv_ca *set;
4602   struct iv_use *use;
4603
4604   /* Get the initial solution.  */
4605   set = get_initial_solution (data);
4606   if (!set)
4607     {
4608       if (dump_file && (dump_flags & TDF_DETAILS))
4609         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4610       return NULL;
4611     }
4612
4613   if (dump_file && (dump_flags & TDF_DETAILS))
4614     {
4615       fprintf (dump_file, "Initial set of candidates:\n");
4616       iv_ca_dump (data, dump_file, set);
4617     }
4618
4619   while (try_improve_iv_set (data, set))
4620     {
4621       if (dump_file && (dump_flags & TDF_DETAILS))
4622         {
4623           fprintf (dump_file, "Improved to:\n");
4624           iv_ca_dump (data, dump_file, set);
4625         }
4626     }
4627
4628   if (dump_file && (dump_flags & TDF_DETAILS))
4629     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4630
4631   for (i = 0; i < n_iv_uses (data); i++)
4632     {
4633       use = iv_use (data, i);
4634       use->selected = iv_ca_cand_for_use (set, use)->cand;
4635     }
4636
4637   return set;
4638 }
4639
4640 /* Creates a new induction variable corresponding to CAND.  */
4641
4642 static void
4643 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4644 {
4645   block_stmt_iterator incr_pos;
4646   tree base;
4647   bool after = false;
4648
4649   if (!cand->iv)
4650     return;
4651
4652   switch (cand->pos)
4653     {
4654     case IP_NORMAL:
4655       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4656       break;
4657
4658     case IP_END:
4659       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4660       after = true;
4661       break;
4662
4663     case IP_ORIGINAL:
4664       /* Mark that the iv is preserved.  */
4665       name_info (data, cand->var_before)->preserve_biv = true;
4666       name_info (data, cand->var_after)->preserve_biv = true;
4667
4668       /* Rewrite the increment so that it uses var_before directly.  */
4669       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4670
4671       return;
4672     }
4673
4674   gimple_add_tmp_var (cand->var_before);
4675   add_referenced_tmp_var (cand->var_before);
4676
4677   base = unshare_expr (cand->iv->base);
4678
4679   create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4680              &incr_pos, after, &cand->var_before, &cand->var_after);
4681 }
4682
4683 /* Creates new induction variables described in SET.  */
4684
4685 static void
4686 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4687 {
4688   unsigned i;
4689   struct iv_cand *cand;
4690   bitmap_iterator bi;
4691
4692   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4693     {
4694       cand = iv_cand (data, i);
4695       create_new_iv (data, cand);
4696     }
4697 }
4698
4699 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4700    is true, remove also the ssa name defined by the statement.  */
4701
4702 static void
4703 remove_statement (tree stmt, bool including_defined_name)
4704 {
4705   if (TREE_CODE (stmt) == PHI_NODE)
4706     {
4707       if (!including_defined_name)
4708         {
4709           /* Prevent the ssa name defined by the statement from being removed.  */
4710           SET_PHI_RESULT (stmt, NULL);
4711         }
4712       remove_phi_node (stmt, NULL_TREE);
4713     }
4714   else
4715     {
4716       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4717
4718       bsi_remove (&bsi);
4719     }
4720 }
4721
4722 /* Rewrites USE (definition of iv used in a nonlinear expression)
4723    using candidate CAND.  */
4724
4725 static void
4726 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4727                             struct iv_use *use, struct iv_cand *cand)
4728 {
4729   tree comp;
4730   tree op, stmts, tgt, ass;
4731   block_stmt_iterator bsi, pbsi;
4732
4733   /* An important special case -- if we are asked to express value of
4734      the original iv by itself, just exit; there is no need to
4735      introduce a new computation (that might also need casting the
4736      variable to unsigned and back).  */
4737   if (cand->pos == IP_ORIGINAL
4738       && TREE_CODE (use->stmt) == MODIFY_EXPR
4739       && TREE_OPERAND (use->stmt, 0) == cand->var_after)
4740     {
4741       op = TREE_OPERAND (use->stmt, 1);
4742
4743       /* Be a bit careful.  In case variable is expressed in some
4744          complicated way, rewrite it so that we may get rid of this
4745          complicated expression.  */
4746       if ((TREE_CODE (op) == PLUS_EXPR
4747            || TREE_CODE (op) == MINUS_EXPR)
4748           && TREE_OPERAND (op, 0) == cand->var_before
4749           && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
4750         return;
4751     }
4752
4753   comp = unshare_expr (get_computation (data->current_loop,
4754                                         use, cand));
4755   switch (TREE_CODE (use->stmt))
4756     {
4757     case PHI_NODE:
4758       tgt = PHI_RESULT (use->stmt);
4759
4760       /* If we should keep the biv, do not replace it.  */
4761       if (name_info (data, tgt)->preserve_biv)
4762         return;
4763
4764       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4765       while (!bsi_end_p (pbsi)
4766              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4767         {
4768           bsi = pbsi;
4769           bsi_next (&pbsi);
4770         }
4771       break;
4772
4773     case MODIFY_EXPR:
4774       tgt = TREE_OPERAND (use->stmt, 0);
4775       bsi = bsi_for_stmt (use->stmt);
4776       break;
4777
4778     default:
4779       gcc_unreachable ();
4780     }
4781
4782   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4783
4784   if (TREE_CODE (use->stmt) == PHI_NODE)
4785     {
4786       if (stmts)
4787         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4788       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4789       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4790       remove_statement (use->stmt, false);
4791       SSA_NAME_DEF_STMT (tgt) = ass;
4792     }
4793   else
4794     {
4795       if (stmts)
4796         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4797       TREE_OPERAND (use->stmt, 1) = op;
4798     }
4799 }
4800
4801 /* Replaces ssa name in index IDX by its basic variable.  Callback for
4802    for_each_index.  */
4803
4804 static bool
4805 idx_remove_ssa_names (tree base, tree *idx,
4806                       void *data ATTRIBUTE_UNUSED)
4807 {
4808   tree *op;
4809
4810   if (TREE_CODE (*idx) == SSA_NAME)
4811     *idx = SSA_NAME_VAR (*idx);
4812
4813   if (TREE_CODE (base) == ARRAY_REF)
4814     {
4815       op = &TREE_OPERAND (base, 2);
4816       if (*op
4817           && TREE_CODE (*op) == SSA_NAME)
4818         *op = SSA_NAME_VAR (*op);
4819       op = &TREE_OPERAND (base, 3);
4820       if (*op
4821           && TREE_CODE (*op) == SSA_NAME)
4822         *op = SSA_NAME_VAR (*op);
4823     }
4824
4825   return true;
4826 }
4827
4828 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
4829
4830 static tree
4831 unshare_and_remove_ssa_names (tree ref)
4832 {
4833   ref = unshare_expr (ref);
4834   for_each_index (&ref, idx_remove_ssa_names, NULL);
4835
4836   return ref;
4837 }
4838
4839 /* Rewrites base of memory access OP with expression WITH in statement
4840    pointed to by BSI.  */
4841
4842 static void
4843 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4844 {
4845   tree bvar, var, new_name, copy, name;
4846   tree orig;
4847
4848   var = bvar = get_base_address (*op);
4849
4850   if (!var || TREE_CODE (with) != SSA_NAME)
4851     goto do_rewrite;
4852
4853   gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4854   gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4855   if (TREE_CODE (var) == INDIRECT_REF)
4856     var = TREE_OPERAND (var, 0);
4857   if (TREE_CODE (var) == SSA_NAME)
4858     {
4859       name = var;
4860       var = SSA_NAME_VAR (var);
4861     }
4862   else if (DECL_P (var))
4863     name = NULL_TREE;
4864   else
4865     goto do_rewrite;
4866
4867   /* We need to add a memory tag for the variable.  But we do not want
4868      to add it to the temporary used for the computations, since this leads
4869      to problems in redundancy elimination when there are common parts
4870      in two computations referring to the different arrays.  So we copy
4871      the variable to a new temporary.  */
4872   copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4873
4874   if (name)
4875     new_name = duplicate_ssa_name (name, copy);
4876   else
4877     {
4878       tree tag = var_ann (var)->type_mem_tag;
4879       tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
4880       add_referenced_tmp_var (new_ptr);
4881       if (tag)
4882         var_ann (new_ptr)->type_mem_tag = tag;
4883       else
4884         add_type_alias (new_ptr, var);
4885       new_name = make_ssa_name (new_ptr, copy);
4886     }
4887
4888   TREE_OPERAND (copy, 0) = new_name;
4889   update_stmt (copy);
4890   bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4891   with = new_name;
4892
4893 do_rewrite:
4894
4895   orig = NULL_TREE;
4896   gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4897   gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4898
4899   if (TREE_CODE (*op) == INDIRECT_REF)
4900     orig = REF_ORIGINAL (*op);
4901   if (!orig)
4902     orig = unshare_and_remove_ssa_names (*op);
4903
4904   *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4905
4906   /* Record the original reference, for purposes of alias analysis.  */
4907   REF_ORIGINAL (*op) = orig;
4908
4909   /* Virtual operands in the original statement may have to be renamed
4910      because of the replacement.  */
4911   mark_new_vars_to_rename (bsi_stmt (*bsi));
4912 }
4913
4914 /* Rewrites USE (address that is an iv) using candidate CAND.  */
4915
4916 static void
4917 rewrite_use_address (struct ivopts_data *data,
4918                      struct iv_use *use, struct iv_cand *cand)
4919 {
4920   tree comp = unshare_expr (get_computation (data->current_loop,
4921                                              use, cand));
4922   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4923   tree stmts;
4924   tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4925
4926   if (stmts)
4927     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4928
4929   rewrite_address_base (&bsi, use->op_p, op);
4930 }
4931
4932 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4933    candidate CAND.  */
4934
4935 static void
4936 rewrite_use_compare (struct ivopts_data *data,
4937                      struct iv_use *use, struct iv_cand *cand)
4938 {
4939   tree comp;
4940   tree *op_p, cond, op, stmts, bound;
4941   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4942   enum tree_code compare;
4943   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
4944
4945   bound = cp->value;
4946   if (bound)
4947     {
4948       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
4949       tree var_type = TREE_TYPE (var);
4950
4951       compare = iv_elimination_compare (data, use);
4952       bound = fold_convert (var_type, bound);
4953       op = force_gimple_operand (unshare_expr (bound), &stmts,
4954                                  true, NULL_TREE);
4955
4956       if (stmts)
4957         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4958
4959       *use->op_p = build2 (compare, boolean_type_node, var, op);
4960       update_stmt (use->stmt);
4961       return;
4962     }
4963
4964   /* The induction variable elimination failed; just express the original
4965      giv.  */
4966   comp = unshare_expr (get_computation (data->current_loop, use, cand));
4967
4968   cond = *use->op_p;
4969   op_p = &TREE_OPERAND (cond, 0);
4970   if (TREE_CODE (*op_p) != SSA_NAME
4971       || zero_p (get_iv (data, *op_p)->step))
4972     op_p = &TREE_OPERAND (cond, 1);
4973
4974   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4975   if (stmts)
4976     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4977
4978   *op_p = op;
4979 }
4980
4981 /* Ensure that operand *OP_P may be used at the end of EXIT without
4982    violating loop closed ssa form.  */
4983
4984 static void
4985 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4986 {
4987   basic_block def_bb;
4988   struct loop *def_loop;
4989   tree phi, use;
4990
4991   use = USE_FROM_PTR (op_p);
4992   if (TREE_CODE (use) != SSA_NAME)
4993     return;
4994
4995   def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4996   if (!def_bb)
4997     return;
4998
4999   def_loop = def_bb->loop_father;
5000   if (flow_bb_inside_loop_p (def_loop, exit->dest))
5001     return;
5002
5003   /* Try finding a phi node that copies the value out of the loop.  */
5004   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5005     if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5006       break;
5007
5008   if (!phi)
5009     {
5010       /* Create such a phi node.  */
5011       tree new_name = duplicate_ssa_name (use, NULL);
5012
5013       phi = create_phi_node (new_name, exit->dest);
5014       SSA_NAME_DEF_STMT (new_name) = phi;
5015       add_phi_arg (phi, use, exit);
5016     }
5017
5018   SET_USE (op_p, PHI_RESULT (phi));
5019 }
5020
5021 /* Ensure that operands of STMT may be used at the end of EXIT without
5022    violating loop closed ssa form.  */
5023
5024 static void
5025 protect_loop_closed_ssa_form (edge exit, tree stmt)
5026 {
5027   use_optype uses;
5028   vuse_optype vuses;
5029   v_may_def_optype v_may_defs;
5030   unsigned i;
5031
5032   uses = STMT_USE_OPS (stmt);
5033   for (i = 0; i < NUM_USES (uses); i++)
5034     protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
5035
5036   vuses = STMT_VUSE_OPS (stmt);
5037   for (i = 0; i < NUM_VUSES (vuses); i++)
5038     protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
5039
5040   v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5041   for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
5042     protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
5043 }
5044
5045 /* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5046    so that they are emitted on the correct place, and so that the loop closed
5047    ssa form is preserved.  */
5048
5049 static void
5050 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5051 {
5052   tree_stmt_iterator tsi;
5053   block_stmt_iterator bsi;
5054   tree phi, stmt, def, next;
5055
5056   if (!single_pred_p (exit->dest))
5057     split_loop_exit_edge (exit);
5058
5059   /* Ensure there is label in exit->dest, so that we can
5060      insert after it.  */
5061   tree_block_label (exit->dest);
5062   bsi = bsi_after_labels (exit->dest);
5063
5064   if (TREE_CODE (stmts) == STATEMENT_LIST)
5065     {
5066       for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5067         {
5068           bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5069           protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5070         }
5071     }
5072   else
5073     {
5074       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5075       protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5076     }
5077
5078   if (!op)
5079     return;
5080
5081   for (phi = phi_nodes (exit->dest); phi; phi = next)
5082     {
5083       next = PHI_CHAIN (phi);
5084
5085       if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5086         {
5087           def = PHI_RESULT (phi);
5088           remove_statement (phi, false);
5089           stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5090                         def, op);
5091           SSA_NAME_DEF_STMT (def) = stmt;
5092           bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5093         }
5094     }
5095 }
5096
5097 /* Rewrites the final value of USE (that is only needed outside of the loop)
5098    using candidate CAND.  */
5099
5100 static void
5101 rewrite_use_outer (struct ivopts_data *data,
5102                    struct iv_use *use, struct iv_cand *cand)
5103 {
5104   edge exit;
5105   tree value, op, stmts, tgt;
5106   tree phi;
5107
5108   switch (TREE_CODE (use->stmt))
5109     {
5110     case PHI_NODE:
5111       tgt = PHI_RESULT (use->stmt);
5112       break;
5113     case MODIFY_EXPR:
5114       tgt = TREE_OPERAND (use->stmt, 0);
5115       break;
5116     default:
5117       gcc_unreachable ();
5118     }
5119
5120   exit = single_dom_exit (data->current_loop);
5121
5122   if (exit)
5123     {
5124       if (!cand->iv)
5125         {
5126           struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5127           value = cp->value;
5128         }
5129       else
5130         value = get_computation_at (data->current_loop,
5131                                     use, cand, last_stmt (exit->src));
5132
5133       value = unshare_expr (value);
5134       op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5135
5136       /* If we will preserve the iv anyway and we would need to perform
5137          some computation to replace the final value, do nothing.  */
5138       if (stmts && name_info (data, tgt)->preserve_biv)
5139         return;
5140
5141       for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5142         {
5143           use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5144
5145           if (USE_FROM_PTR (use_p) == tgt)
5146             SET_USE (use_p, op);
5147         }
5148
5149       if (stmts)
5150         compute_phi_arg_on_exit (exit, stmts, op);
5151
5152       /* Enable removal of the statement.  We cannot remove it directly,
5153          since we may still need the aliasing information attached to the
5154          ssa name defined by it.  */
5155       name_info (data, tgt)->iv->have_use_for = false;
5156       return;
5157     }
5158
5159   /* If the variable is going to be preserved anyway, there is nothing to
5160      do.  */
5161   if (name_info (data, tgt)->preserve_biv)
5162     return;
5163
5164   /* Otherwise we just need to compute the iv.  */
5165   rewrite_use_nonlinear_expr (data, use, cand);
5166 }
5167
5168 /* Rewrites USE using candidate CAND.  */
5169
5170 static void
5171 rewrite_use (struct ivopts_data *data,
5172              struct iv_use *use, struct iv_cand *cand)
5173 {
5174   switch (use->type)
5175     {
5176       case USE_NONLINEAR_EXPR:
5177         rewrite_use_nonlinear_expr (data, use, cand);
5178         break;
5179
5180       case USE_OUTER:
5181         rewrite_use_outer (data, use, cand);
5182         break;
5183
5184       case USE_ADDRESS:
5185         rewrite_use_address (data, use, cand);
5186         break;
5187
5188       case USE_COMPARE:
5189         rewrite_use_compare (data, use, cand);
5190         break;
5191
5192       default:
5193         gcc_unreachable ();
5194     }
5195   update_stmt (use->stmt);
5196 }
5197
5198 /* Rewrite the uses using the selected induction variables.  */
5199
5200 static void
5201 rewrite_uses (struct ivopts_data *data)
5202 {
5203   unsigned i;
5204   struct iv_cand *cand;
5205   struct iv_use *use;
5206
5207   for (i = 0; i < n_iv_uses (data); i++)
5208     {
5209       use = iv_use (data, i);
5210       cand = use->selected;
5211       gcc_assert (cand);
5212
5213       rewrite_use (data, use, cand);
5214     }
5215 }
5216
5217 /* Removes the ivs that are not used after rewriting.  */
5218
5219 static void
5220 remove_unused_ivs (struct ivopts_data *data)
5221 {
5222   unsigned j;
5223   bitmap_iterator bi;
5224
5225   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5226     {
5227       struct version_info *info;
5228
5229       info = ver_info (data, j);
5230       if (info->iv
5231           && !zero_p (info->iv->step)
5232           && !info->inv_id
5233           && !info->iv->have_use_for
5234           && !info->preserve_biv)
5235         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5236     }
5237 }
5238
5239 /* Frees data allocated by the optimization of a single loop.  */
5240
5241 static void
5242 free_loop_data (struct ivopts_data *data)
5243 {
5244   unsigned i, j;
5245   bitmap_iterator bi;
5246   tree obj;
5247
5248   htab_empty (data->niters);
5249
5250   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5251     {
5252       struct version_info *info;
5253
5254       info = ver_info (data, i);
5255       if (info->iv)
5256         free (info->iv);
5257       info->iv = NULL;
5258       info->has_nonlin_use = false;
5259       info->preserve_biv = false;
5260       info->inv_id = 0;
5261     }
5262   bitmap_clear (data->relevant);
5263   bitmap_clear (data->important_candidates);
5264
5265   for (i = 0; i < n_iv_uses (data); i++)
5266     {
5267       struct iv_use *use = iv_use (data, i);
5268
5269       free (use->iv);
5270       BITMAP_FREE (use->related_cands);
5271       for (j = 0; j < use->n_map_members; j++)
5272         if (use->cost_map[j].depends_on)
5273           BITMAP_FREE (use->cost_map[j].depends_on);
5274       free (use->cost_map);
5275       free (use);
5276     }
5277   VEC_truncate (iv_use_p, data->iv_uses, 0);
5278
5279   for (i = 0; i < n_iv_cands (data); i++)
5280     {
5281       struct iv_cand *cand = iv_cand (data, i);
5282
5283       if (cand->iv)
5284         free (cand->iv);
5285       free (cand);
5286     }
5287   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5288
5289   if (data->version_info_size < num_ssa_names)
5290     {
5291       data->version_info_size = 2 * num_ssa_names;
5292       free (data->version_info);
5293       data->version_info = xcalloc (data->version_info_size,
5294                                     sizeof (struct version_info));
5295     }
5296
5297   data->max_inv_id = 0;
5298
5299   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5300     SET_DECL_RTL (obj, NULL_RTX);
5301
5302   VEC_truncate (tree, decl_rtl_to_reset, 0);
5303 }
5304
5305 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5306    loop tree.  */
5307
5308 static void
5309 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5310 {
5311   unsigned i;
5312
5313   for (i = 1; i < loops->num; i++)
5314     if (loops->parray[i])
5315       {
5316         free (loops->parray[i]->aux);
5317         loops->parray[i]->aux = NULL;
5318       }
5319
5320   free_loop_data (data);
5321   free (data->version_info);
5322   BITMAP_FREE (data->relevant);
5323   BITMAP_FREE (data->important_candidates);
5324   htab_delete (data->niters);
5325
5326   VEC_free (tree, heap, decl_rtl_to_reset);
5327   VEC_free (iv_use_p, heap, data->iv_uses);
5328   VEC_free (iv_cand_p, heap, data->iv_candidates);
5329 }
5330
5331 /* Optimizes the LOOP.  Returns true if anything changed.  */
5332
5333 static bool
5334 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5335 {
5336   bool changed = false;
5337   struct iv_ca *iv_ca;
5338   edge exit;
5339
5340   data->current_loop = loop;
5341
5342   if (dump_file && (dump_flags & TDF_DETAILS))
5343     {
5344       fprintf (dump_file, "Processing loop %d\n", loop->num);
5345
5346       exit = single_dom_exit (loop);
5347       if (exit)
5348         {
5349           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5350                    exit->src->index, exit->dest->index);
5351           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5352           fprintf (dump_file, "\n");
5353         }
5354
5355       fprintf (dump_file, "\n");
5356     }
5357
5358   /* For each ssa name determines whether it behaves as an induction variable
5359      in some loop.  */
5360   if (!find_induction_variables (data))
5361     goto finish;
5362
5363   /* Finds interesting uses (item 1).  */
5364   find_interesting_uses (data);
5365   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5366     goto finish;
5367
5368   /* Finds candidates for the induction variables (item 2).  */
5369   find_iv_candidates (data);
5370
5371   /* Calculates the costs (item 3, part 1).  */
5372   determine_use_iv_costs (data);
5373   determine_iv_costs (data);
5374   determine_set_costs (data);
5375
5376   /* Find the optimal set of induction variables (item 3, part 2).  */
5377   iv_ca = find_optimal_iv_set (data);
5378   if (!iv_ca)
5379     goto finish;
5380   changed = true;
5381
5382   /* Create the new induction variables (item 4, part 1).  */
5383   create_new_ivs (data, iv_ca);
5384   iv_ca_free (&iv_ca);
5385
5386   /* Rewrite the uses (item 4, part 2).  */
5387   rewrite_uses (data);
5388
5389   /* Remove the ivs that are unused after rewriting.  */
5390   remove_unused_ivs (data);
5391
5392   /* We have changed the structure of induction variables; it might happen
5393      that definitions in the scev database refer to some of them that were
5394      eliminated.  */
5395   scev_reset ();
5396
5397 finish:
5398   free_loop_data (data);
5399
5400   return changed;
5401 }
5402
5403 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5404
5405 void
5406 tree_ssa_iv_optimize (struct loops *loops)
5407 {
5408   struct loop *loop;
5409   struct ivopts_data data;
5410
5411   tree_ssa_iv_optimize_init (loops, &data);
5412
5413   /* Optimize the loops starting with the innermost ones.  */
5414   loop = loops->tree_root;
5415   while (loop->inner)
5416     loop = loop->inner;
5417
5418   /* Scan the loops, inner ones first.  */
5419   while (loop != loops->tree_root)
5420     {
5421       if (dump_file && (dump_flags & TDF_DETAILS))
5422         flow_loop_dump (loop, dump_file, NULL, 1);
5423
5424       tree_ssa_iv_optimize_loop (&data, loop);
5425
5426       if (loop->next)
5427         {
5428           loop = loop->next;
5429           while (loop->inner)
5430             loop = loop->inner;
5431         }
5432       else
5433         loop = loop->outer;
5434     }
5435
5436   /* FIXME.  IV opts introduces new aliases and call-clobbered
5437      variables, which need to be renamed.  However, when we call the
5438      renamer, not all statements will be scanned for operands.  In
5439      particular, the newly introduced aliases may appear in statements
5440      that are considered "unmodified", so the renamer will not get a
5441      chance to rename those operands.
5442
5443      Work around this problem by forcing an operand re-scan on every
5444      statement.  This will not be necessary once the new operand
5445      scanner is implemented.  */
5446   if (need_ssa_update_p ())
5447     {
5448       basic_block bb;
5449       block_stmt_iterator si;
5450       FOR_EACH_BB (bb)
5451         for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5452           update_stmt (bsi_stmt (si));
5453     }
5454
5455   rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
5456   tree_ssa_iv_optimize_finalize (loops, &data);
5457 }