gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "pointer-set.h"
  87 #include "hashtab.h"
  88 #include "tree-chrec.h"
  89 #include "tree-scalar-evolution.h"
  90 #include "cfgloop.h"
  91 #include "params.h"
  92 #include "langhooks.h"
  93 #include "tree-affine.h"
  94 #include "target.h"
  95
  96 /* The infinite cost.  */
  97 #define INFTY 10000000
  98
  99 /* The expected number of loop iterations.  TODO -- use profiling instead of
 100    this.  */
 101 #define AVG_LOOP_NITER(LOOP) 5
 102
 103
 104 /* Representation of the induction variable.  */
 105 struct iv
 106 {
 107   tree base;            /* Initial value of the iv.  */
 108   tree base_object;     /* A memory object to that the induction variable points.  */
 109   tree step;            /* Step of the iv (constant only).  */
 110   tree ssa_name;        /* The ssa name with the value.  */
 111   bool biv_p;           /* Is it a biv?  */
 112   bool have_use_for;    /* Do we already have a use for it?  */
 113   unsigned use_id;      /* The identifier in the use if it is the case.  */
 114 };
 115
 116 /* Per-ssa version information (induction variable descriptions, etc.).  */
 117 struct version_info
 118 {
 119   tree name;            /* The ssa name.  */
 120   struct iv *iv;        /* Induction variable description.  */
 121   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 122                            an expression that is not an induction variable.  */
 123   unsigned inv_id;      /* Id of an invariant.  */
 124   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 125 };
 126
 127 /* Types of uses.  */
 128 enum use_type
 129 {
 130   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 131   USE_ADDRESS,          /* Use in an address.  */
 132   USE_COMPARE           /* Use is a compare.  */
 133 };
 134
 135 /* The candidate - cost pair.  */
 136 struct cost_pair
 137 {
 138   struct iv_cand *cand; /* The candidate.  */
 139   unsigned cost;        /* The cost.  */
 140   bitmap depends_on;    /* The list of invariants that have to be
 141                            preserved.  */
 142   tree value;           /* For final value elimination, the expression for
 143                            the final value of the iv.  For iv elimination,
 144                            the new bound to compare with.  */
 145 };
 146
 147 /* Use.  */
 148 struct iv_use
 149 {
 150   unsigned id;          /* The id of the use.  */
 151   enum use_type type;   /* Type of the use.  */
 152   struct iv *iv;        /* The induction variable it is based on.  */
 153   tree stmt;            /* Statement in that it occurs.  */
 154   tree *op_p;           /* The place where it occurs.  */
 155   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 156                            important ones.  */
 157
 158   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 159   struct cost_pair *cost_map;
 160                         /* The costs wrto the iv candidates.  */
 161
 162   struct iv_cand *selected;
 163                         /* The selected candidate.  */
 164 };
 165
 166 /* The position where the iv is computed.  */
 167 enum iv_position
 168 {
 169   IP_NORMAL,            /* At the end, just before the exit condition.  */
 170   IP_END,               /* At the end of the latch block.  */
 171   IP_ORIGINAL           /* The original biv.  */
 172 };
 173
 174 /* The induction variable candidate.  */
 175 struct iv_cand
 176 {
 177   unsigned id;          /* The number of the candidate.  */
 178   bool important;       /* Whether this is an "important" candidate, i.e. such
 179                            that it should be considered by all uses.  */
 180   enum iv_position pos; /* Where it is computed.  */
 181   tree incremented_at;  /* For original biv, the statement where it is
 182                            incremented.  */
 183   tree var_before;      /* The variable used for it before increment.  */
 184   tree var_after;       /* The variable used for it after increment.  */
 185   struct iv *iv;        /* The value of the candidate.  NULL for
 186                            "pseudocandidate" used to indicate the possibility
 187                            to replace the final value of an iv by direct
 188                            computation of the value.  */
 189   unsigned cost;        /* Cost of the candidate.  */
 190   bitmap depends_on;    /* The list of invariants that are used in step of the
 191                            biv.  */
 192 };
 193
 194 /* The data used by the induction variable optimizations.  */
 195
 196 typedef struct iv_use *iv_use_p;
 197 DEF_VEC_P(iv_use_p);
 198 DEF_VEC_ALLOC_P(iv_use_p,heap);
 199
 200 typedef struct iv_cand *iv_cand_p;
 201 DEF_VEC_P(iv_cand_p);
 202 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 203
 204 struct ivopts_data
 205 {
 206   /* The currently optimized loop.  */
 207   struct loop *current_loop;
 208
 209   /* Number of registers used in it.  */
 210   unsigned regs_used;
 211
 212   /* Numbers of iterations for all exits of the current loop.  */
 213   struct pointer_map_t *niters;
 214
 215   /* The size of version_info array allocated.  */
 216   unsigned version_info_size;
 217
 218   /* The array of information for the ssa names.  */
 219   struct version_info *version_info;
 220
 221   /* The bitmap of indices in version_info whose value was changed.  */
 222   bitmap relevant;
 223
 224   /* The maximum invariant id.  */
 225   unsigned max_inv_id;
 226
 227   /* The uses of induction variables.  */
 228   VEC(iv_use_p,heap) *iv_uses;
 229
 230   /* The candidates.  */
 231   VEC(iv_cand_p,heap) *iv_candidates;
 232
 233   /* A bitmap of important candidates.  */
 234   bitmap important_candidates;
 235
 236   /* Whether to consider just related and important candidates when replacing a
 237      use.  */
 238   bool consider_all_candidates;
 239 };
 240
 241 /* An assignment of iv candidates to uses.  */
 242
 243 struct iv_ca
 244 {
 245   /* The number of uses covered by the assignment.  */
 246   unsigned upto;
 247
 248   /* Number of uses that cannot be expressed by the candidates in the set.  */
 249   unsigned bad_uses;
 250
 251   /* Candidate assigned to a use, together with the related costs.  */
 252   struct cost_pair **cand_for_use;
 253
 254   /* Number of times each candidate is used.  */
 255   unsigned *n_cand_uses;
 256
 257   /* The candidates used.  */
 258   bitmap cands;
 259
 260   /* The number of candidates in the set.  */
 261   unsigned n_cands;
 262
 263   /* Total number of registers needed.  */
 264   unsigned n_regs;
 265
 266   /* Total cost of expressing uses.  */
 267   unsigned cand_use_cost;
 268
 269   /* Total cost of candidates.  */
 270   unsigned cand_cost;
 271
 272   /* Number of times each invariant is used.  */
 273   unsigned *n_invariant_uses;
 274
 275   /* Total cost of the assignment.  */
 276   unsigned cost;
 277 };
 278
 279 /* Difference of two iv candidate assignments.  */
 280
 281 struct iv_ca_delta
 282 {
 283   /* Changed use.  */
 284   struct iv_use *use;
 285
 286   /* An old assignment (for rollback purposes).  */
 287   struct cost_pair *old_cp;
 288
 289   /* A new assignment.  */
 290   struct cost_pair *new_cp;
 291
 292   /* Next change in the list.  */
 293   struct iv_ca_delta *next_change;
 294 };
 295
 296 /* Bound on number of candidates below that all candidates are considered.  */
 297
 298 #define CONSIDER_ALL_CANDIDATES_BOUND \
 299   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 300
 301 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 302    optimizing such a loop would help, and it would take ages).  */
 303
 304 #define MAX_CONSIDERED_USES \
 305   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 306
 307 /* If there are at most this number of ivs in the set, try removing unnecessary
 308    ivs from the set always.  */
 309
 310 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 311   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 312
 313 /* The list of trees for that the decl_rtl field must be reset is stored
 314    here.  */
 315
 316 static VEC(tree,heap) *decl_rtl_to_reset;
 317
 318 /* Number of uses recorded in DATA.  */
 319
 320 static inline unsigned
 321 n_iv_uses (struct ivopts_data *data)
 322 {
 323   return VEC_length (iv_use_p, data->iv_uses);
 324 }
 325
 326 /* Ith use recorded in DATA.  */
 327
 328 static inline struct iv_use *
 329 iv_use (struct ivopts_data *data, unsigned i)
 330 {
 331   return VEC_index (iv_use_p, data->iv_uses, i);
 332 }
 333
 334 /* Number of candidates recorded in DATA.  */
 335
 336 static inline unsigned
 337 n_iv_cands (struct ivopts_data *data)
 338 {
 339   return VEC_length (iv_cand_p, data->iv_candidates);
 340 }
 341
 342 /* Ith candidate recorded in DATA.  */
 343
 344 static inline struct iv_cand *
 345 iv_cand (struct ivopts_data *data, unsigned i)
 346 {
 347   return VEC_index (iv_cand_p, data->iv_candidates, i);
 348 }
 349
 350 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 351
 352 edge
 353 single_dom_exit (struct loop *loop)
 354 {
 355   edge exit = single_exit (loop);
 356
 357   if (!exit)
 358     return NULL;
 359
 360   if (!just_once_each_iteration_p (loop, exit->src))
 361     return NULL;
 362
 363   return exit;
 364 }
 365
 366 /* Dumps information about the induction variable IV to FILE.  */
 367
 368 extern void dump_iv (FILE *, struct iv *);
 369 void
 370 dump_iv (FILE *file, struct iv *iv)
 371 {
 372   if (iv->ssa_name)
 373     {
 374       fprintf (file, "ssa name ");
 375       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 376       fprintf (file, "\n");
 377     }
 378
 379   fprintf (file, "  type ");
 380   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 381   fprintf (file, "\n");
 382
 383   if (iv->step)
 384     {
 385       fprintf (file, "  base ");
 386       print_generic_expr (file, iv->base, TDF_SLIM);
 387       fprintf (file, "\n");
 388
 389       fprintf (file, "  step ");
 390       print_generic_expr (file, iv->step, TDF_SLIM);
 391       fprintf (file, "\n");
 392     }
 393   else
 394     {
 395       fprintf (file, "  invariant ");
 396       print_generic_expr (file, iv->base, TDF_SLIM);
 397       fprintf (file, "\n");
 398     }
 399
 400   if (iv->base_object)
 401     {
 402       fprintf (file, "  base object ");
 403       print_generic_expr (file, iv->base_object, TDF_SLIM);
 404       fprintf (file, "\n");
 405     }
 406
 407   if (iv->biv_p)
 408     fprintf (file, "  is a biv\n");
 409 }
 410
 411 /* Dumps information about the USE to FILE.  */
 412
 413 extern void dump_use (FILE *, struct iv_use *);
 414 void
 415 dump_use (FILE *file, struct iv_use *use)
 416 {
 417   fprintf (file, "use %d\n", use->id);
 418
 419   switch (use->type)
 420     {
 421     case USE_NONLINEAR_EXPR:
 422       fprintf (file, "  generic\n");
 423       break;
 424
 425     case USE_ADDRESS:
 426       fprintf (file, "  address\n");
 427       break;
 428
 429     case USE_COMPARE:
 430       fprintf (file, "  compare\n");
 431       break;
 432
 433     default:
 434       gcc_unreachable ();
 435     }
 436
 437   fprintf (file, "  in statement ");
 438   print_generic_expr (file, use->stmt, TDF_SLIM);
 439   fprintf (file, "\n");
 440
 441   fprintf (file, "  at position ");
 442   if (use->op_p)
 443     print_generic_expr (file, *use->op_p, TDF_SLIM);
 444   fprintf (file, "\n");
 445
 446   dump_iv (file, use->iv);
 447
 448   if (use->related_cands)
 449     {
 450       fprintf (file, "  related candidates ");
 451       dump_bitmap (file, use->related_cands);
 452     }
 453 }
 454
 455 /* Dumps information about the uses to FILE.  */
 456
 457 extern void dump_uses (FILE *, struct ivopts_data *);
 458 void
 459 dump_uses (FILE *file, struct ivopts_data *data)
 460 {
 461   unsigned i;
 462   struct iv_use *use;
 463
 464   for (i = 0; i < n_iv_uses (data); i++)
 465     {
 466       use = iv_use (data, i);
 467
 468       dump_use (file, use);
 469       fprintf (file, "\n");
 470     }
 471 }
 472
 473 /* Dumps information about induction variable candidate CAND to FILE.  */
 474
 475 extern void dump_cand (FILE *, struct iv_cand *);
 476 void
 477 dump_cand (FILE *file, struct iv_cand *cand)
 478 {
 479   struct iv *iv = cand->iv;
 480
 481   fprintf (file, "candidate %d%s\n",
 482            cand->id, cand->important ? " (important)" : "");
 483
 484   if (cand->depends_on)
 485     {
 486       fprintf (file, "  depends on ");
 487       dump_bitmap (file, cand->depends_on);
 488     }
 489
 490   if (!iv)
 491     {
 492       fprintf (file, "  final value replacement\n");
 493       return;
 494     }
 495
 496   switch (cand->pos)
 497     {
 498     case IP_NORMAL:
 499       fprintf (file, "  incremented before exit test\n");
 500       break;
 501
 502     case IP_END:
 503       fprintf (file, "  incremented at end\n");
 504       break;
 505
 506     case IP_ORIGINAL:
 507       fprintf (file, "  original biv\n");
 508       break;
 509     }
 510
 511   dump_iv (file, iv);
 512 }
 513
 514 /* Returns the info for ssa version VER.  */
 515
 516 static inline struct version_info *
 517 ver_info (struct ivopts_data *data, unsigned ver)
 518 {
 519   return data->version_info + ver;
 520 }
 521
 522 /* Returns the info for ssa name NAME.  */
 523
 524 static inline struct version_info *
 525 name_info (struct ivopts_data *data, tree name)
 526 {
 527   return ver_info (data, SSA_NAME_VERSION (name));
 528 }
 529
 530 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 531    emitted in LOOP.  */
 532
 533 static bool
 534 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 535 {
 536   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 537
 538   gcc_assert (bb);
 539
 540   if (sbb == loop->latch)
 541     return true;
 542
 543   if (sbb != bb)
 544     return false;
 545
 546   return stmt == last_stmt (bb);
 547 }
 548
 549 /* Returns true if STMT if after the place where the original induction
 550    variable CAND is incremented.  */
 551
 552 static bool
 553 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 554 {
 555   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 556   basic_block stmt_bb = bb_for_stmt (stmt);
 557   block_stmt_iterator bsi;
 558
 559   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 560     return false;
 561
 562   if (stmt_bb != cand_bb)
 563     return true;
 564
 565   /* Scan the block from the end, since the original ivs are usually
 566      incremented at the end of the loop body.  */
 567   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 568     {
 569       if (bsi_stmt (bsi) == cand->incremented_at)
 570         return false;
 571       if (bsi_stmt (bsi) == stmt)
 572         return true;
 573     }
 574 }
 575
 576 /* Returns true if STMT if after the place where the induction variable
 577    CAND is incremented in LOOP.  */
 578
 579 static bool
 580 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 581 {
 582   switch (cand->pos)
 583     {
 584     case IP_END:
 585       return false;
 586
 587     case IP_NORMAL:
 588       return stmt_after_ip_normal_pos (loop, stmt);
 589
 590     case IP_ORIGINAL:
 591       return stmt_after_ip_original_pos (cand, stmt);
 592
 593     default:
 594       gcc_unreachable ();
 595     }
 596 }
 597
 598 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 599
 600 static bool
 601 abnormal_ssa_name_p (tree exp)
 602 {
 603   if (!exp)
 604     return false;
 605
 606   if (TREE_CODE (exp) != SSA_NAME)
 607     return false;
 608
 609   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 610 }
 611
 612 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 613    abnormal phi node.  Callback for for_each_index.  */
 614
 615 static bool
 616 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 617                                   void *data ATTRIBUTE_UNUSED)
 618 {
 619   if (TREE_CODE (base) == ARRAY_REF)
 620     {
 621       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 622         return false;
 623       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 624         return false;
 625     }
 626
 627   return !abnormal_ssa_name_p (*index);
 628 }
 629
 630 /* Returns true if EXPR contains a ssa name that occurs in an
 631    abnormal phi node.  */
 632
 633 bool
 634 contains_abnormal_ssa_name_p (tree expr)
 635 {
 636   enum tree_code code;
 637   enum tree_code_class class;
 638
 639   if (!expr)
 640     return false;
 641
 642   code = TREE_CODE (expr);
 643   class = TREE_CODE_CLASS (code);
 644
 645   if (code == SSA_NAME)
 646     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 647
 648   if (code == INTEGER_CST
 649       || is_gimple_min_invariant (expr))
 650     return false;
 651
 652   if (code == ADDR_EXPR)
 653     return !for_each_index (&TREE_OPERAND (expr, 0),
 654                             idx_contains_abnormal_ssa_name_p,
 655                             NULL);
 656
 657   switch (class)
 658     {
 659     case tcc_binary:
 660     case tcc_comparison:
 661       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 662         return true;
 663
 664       /* Fallthru.  */
 665     case tcc_unary:
 666       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 667         return true;
 668
 669       break;
 670
 671     default:
 672       gcc_unreachable ();
 673     }
 674
 675   return false;
 676 }
 677
 678 /*  Returns tree describing number of iterations determined from
 679     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 680
 681 static tree
 682 niter_for_exit (struct ivopts_data *data, edge exit)
 683 {
 684   struct tree_niter_desc desc;
 685   tree niter;
 686   void **slot;
 687
 688   if (!data->niters)
 689     {
 690       data->niters = pointer_map_create ();
 691       slot = NULL;
 692     }
 693   else
 694     slot = pointer_map_contains (data->niters, exit);
 695
 696   if (!slot)
 697     {
 698       /* Try to determine number of iterations.  We must know it
 699          unconditionally (i.e., without possibility of # of iterations
 700          being zero).  Also, we cannot safely work with ssa names that
 701          appear in phi nodes on abnormal edges, so that we do not create
 702          overlapping life ranges for them (PR 27283).  */
 703       if (number_of_iterations_exit (data->current_loop,
 704                                      exit, &desc, true)
 705           && integer_zerop (desc.may_be_zero)
 706           && !contains_abnormal_ssa_name_p (desc.niter))
 707         niter = desc.niter;
 708       else
 709         niter = NULL_TREE;
 710
 711       *pointer_map_insert (data->niters, exit) = niter;
 712     }
 713   else
 714     niter = *slot;
 715
 716   return niter;
 717 }
 718
 719 /* Returns tree describing number of iterations determined from
 720    single dominating exit of DATA->current_loop, or NULL if something
 721    goes wrong.  */
 722
 723 static tree
 724 niter_for_single_dom_exit (struct ivopts_data *data)
 725 {
 726   edge exit = single_dom_exit (data->current_loop);
 727
 728   if (!exit)
 729     return NULL;
 730
 731   return niter_for_exit (data, exit);
 732 }
 733
 734 /* Initializes data structures used by the iv optimization pass, stored
 735    in DATA.  */
 736
 737 static void
 738 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 739 {
 740   data->version_info_size = 2 * num_ssa_names;
 741   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 742   data->relevant = BITMAP_ALLOC (NULL);
 743   data->important_candidates = BITMAP_ALLOC (NULL);
 744   data->max_inv_id = 0;
 745   data->niters = NULL;
 746   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 747   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 748   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 749 }
 750
 751 /* Returns a memory object to that EXPR points.  In case we are able to
 752    determine that it does not point to any such object, NULL is returned.  */
 753
 754 static tree
 755 determine_base_object (tree expr)
 756 {
 757   enum tree_code code = TREE_CODE (expr);
 758   tree base, obj, op0, op1;
 759
 760   /* If this is a pointer casted to any type, we need to determine
 761      the base object for the pointer; so handle conversions before
 762      throwing away non-pointer expressions.  */
 763   if (TREE_CODE (expr) == NOP_EXPR
 764       || TREE_CODE (expr) == CONVERT_EXPR)
 765     return determine_base_object (TREE_OPERAND (expr, 0));
 766
 767   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 768     return NULL_TREE;
 769
 770   switch (code)
 771     {
 772     case INTEGER_CST:
 773       return NULL_TREE;
 774
 775     case ADDR_EXPR:
 776       obj = TREE_OPERAND (expr, 0);
 777       base = get_base_address (obj);
 778
 779       if (!base)
 780         return expr;
 781
 782       if (TREE_CODE (base) == INDIRECT_REF)
 783         return determine_base_object (TREE_OPERAND (base, 0));
 784
 785       return fold_convert (ptr_type_node,
 786                            build_fold_addr_expr (base));
 787
 788     case PLUS_EXPR:
 789     case MINUS_EXPR:
 790       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 791       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 792
 793       if (!op1)
 794         return op0;
 795
 796       if (!op0)
 797         return (code == PLUS_EXPR
 798                 ? op1
 799                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 800
 801       return fold_build2 (code, ptr_type_node, op0, op1);
 802
 803     default:
 804       return fold_convert (ptr_type_node, expr);
 805     }
 806 }
 807
 808 /* Allocates an induction variable with given initial value BASE and step STEP
 809    for loop LOOP.  */
 810
 811 static struct iv *
 812 alloc_iv (tree base, tree step)
 813 {
 814   struct iv *iv = XCNEW (struct iv);
 815   gcc_assert (step != NULL_TREE);
 816
 817   iv->base = base;
 818   iv->base_object = determine_base_object (base);
 819   iv->step = step;
 820   iv->biv_p = false;
 821   iv->have_use_for = false;
 822   iv->use_id = 0;
 823   iv->ssa_name = NULL_TREE;
 824
 825   return iv;
 826 }
 827
 828 /* Sets STEP and BASE for induction variable IV.  */
 829
 830 static void
 831 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 832 {
 833   struct version_info *info = name_info (data, iv);
 834
 835   gcc_assert (!info->iv);
 836
 837   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 838   info->iv = alloc_iv (base, step);
 839   info->iv->ssa_name = iv;
 840 }
 841
 842 /* Finds induction variable declaration for VAR.  */
 843
 844 static struct iv *
 845 get_iv (struct ivopts_data *data, tree var)
 846 {
 847   basic_block bb;
 848   tree type = TREE_TYPE (var);
 849
 850   if (!POINTER_TYPE_P (type)
 851       && !INTEGRAL_TYPE_P (type))
 852     return NULL;
 853
 854   if (!name_info (data, var)->iv)
 855     {
 856       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 857
 858       if (!bb
 859           || !flow_bb_inside_loop_p (data->current_loop, bb))
 860         set_iv (data, var, var, build_int_cst (type, 0));
 861     }
 862
 863   return name_info (data, var)->iv;
 864 }
 865
 866 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 867    not define a simple affine biv with nonzero step.  */
 868
 869 static tree
 870 determine_biv_step (tree phi)
 871 {
 872   struct loop *loop = bb_for_stmt (phi)->loop_father;
 873   tree name = PHI_RESULT (phi);
 874   affine_iv iv;
 875
 876   if (!is_gimple_reg (name))
 877     return NULL_TREE;
 878
 879   if (!simple_iv (loop, phi, name, &iv, true))
 880     return NULL_TREE;
 881
 882   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 883 }
 884
 885 /* Finds basic ivs.  */
 886
 887 static bool
 888 find_bivs (struct ivopts_data *data)
 889 {
 890   tree phi, step, type, base;
 891   bool found = false;
 892   struct loop *loop = data->current_loop;
 893
 894   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 895     {
 896       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 897         continue;
 898
 899       step = determine_biv_step (phi);
 900       if (!step)
 901         continue;
 902
 903       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 904       base = expand_simple_operations (base);
 905       if (contains_abnormal_ssa_name_p (base)
 906           || contains_abnormal_ssa_name_p (step))
 907         continue;
 908
 909       type = TREE_TYPE (PHI_RESULT (phi));
 910       base = fold_convert (type, base);
 911       if (step)
 912         step = fold_convert (type, step);
 913
 914       set_iv (data, PHI_RESULT (phi), base, step);
 915       found = true;
 916     }
 917
 918   return found;
 919 }
 920
 921 /* Marks basic ivs.  */
 922
 923 static void
 924 mark_bivs (struct ivopts_data *data)
 925 {
 926   tree phi, var;
 927   struct iv *iv, *incr_iv;
 928   struct loop *loop = data->current_loop;
 929   basic_block incr_bb;
 930
 931   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 932     {
 933       iv = get_iv (data, PHI_RESULT (phi));
 934       if (!iv)
 935         continue;
 936
 937       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 938       incr_iv = get_iv (data, var);
 939       if (!incr_iv)
 940         continue;
 941
 942       /* If the increment is in the subloop, ignore it.  */
 943       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 944       if (incr_bb->loop_father != data->current_loop
 945           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 946         continue;
 947
 948       iv->biv_p = true;
 949       incr_iv->biv_p = true;
 950     }
 951 }
 952
 953 /* Checks whether STMT defines a linear induction variable and stores its
 954    parameters to IV.  */
 955
 956 static bool
 957 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 958 {
 959   tree lhs;
 960   struct loop *loop = data->current_loop;
 961
 962   iv->base = NULL_TREE;
 963   iv->step = NULL_TREE;
 964
 965   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 966     return false;
 967
 968   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 969   if (TREE_CODE (lhs) != SSA_NAME)
 970     return false;
 971
 972   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 973     return false;
 974   iv->base = expand_simple_operations (iv->base);
 975
 976   if (contains_abnormal_ssa_name_p (iv->base)
 977       || contains_abnormal_ssa_name_p (iv->step))
 978     return false;
 979
 980   return true;
 981 }
 982
 983 /* Finds general ivs in statement STMT.  */
 984
 985 static void
 986 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
 987 {
 988   affine_iv iv;
 989
 990   if (!find_givs_in_stmt_scev (data, stmt, &iv))
 991     return;
 992
 993   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
 994 }
 995
 996 /* Finds general ivs in basic block BB.  */
 997
 998 static void
 999 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1000 {
1001   block_stmt_iterator bsi;
1002
1003   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1004     find_givs_in_stmt (data, bsi_stmt (bsi));
1005 }
1006
1007 /* Finds general ivs.  */
1008
1009 static void
1010 find_givs (struct ivopts_data *data)
1011 {
1012   struct loop *loop = data->current_loop;
1013   basic_block *body = get_loop_body_in_dom_order (loop);
1014   unsigned i;
1015
1016   for (i = 0; i < loop->num_nodes; i++)
1017     find_givs_in_bb (data, body[i]);
1018   free (body);
1019 }
1020
1021 /* For each ssa name defined in LOOP determines whether it is an induction
1022    variable and if so, its initial value and step.  */
1023
1024 static bool
1025 find_induction_variables (struct ivopts_data *data)
1026 {
1027   unsigned i;
1028   bitmap_iterator bi;
1029
1030   if (!find_bivs (data))
1031     return false;
1032
1033   find_givs (data);
1034   mark_bivs (data);
1035
1036   if (dump_file && (dump_flags & TDF_DETAILS))
1037     {
1038       tree niter = niter_for_single_dom_exit (data);
1039
1040       if (niter)
1041         {
1042           fprintf (dump_file, "  number of iterations ");
1043           print_generic_expr (dump_file, niter, TDF_SLIM);
1044           fprintf (dump_file, "\n\n");
1045         };
1046
1047       fprintf (dump_file, "Induction variables:\n\n");
1048
1049       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1050         {
1051           if (ver_info (data, i)->iv)
1052             dump_iv (dump_file, ver_info (data, i)->iv);
1053         }
1054     }
1055
1056   return true;
1057 }
1058
1059 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1060
1061 static struct iv_use *
1062 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1063             tree stmt, enum use_type use_type)
1064 {
1065   struct iv_use *use = XCNEW (struct iv_use);
1066
1067   use->id = n_iv_uses (data);
1068   use->type = use_type;
1069   use->iv = iv;
1070   use->stmt = stmt;
1071   use->op_p = use_p;
1072   use->related_cands = BITMAP_ALLOC (NULL);
1073
1074   /* To avoid showing ssa name in the dumps, if it was not reset by the
1075      caller.  */
1076   iv->ssa_name = NULL_TREE;
1077
1078   if (dump_file && (dump_flags & TDF_DETAILS))
1079     dump_use (dump_file, use);
1080
1081   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1082
1083   return use;
1084 }
1085
1086 /* Checks whether OP is a loop-level invariant and if so, records it.
1087    NONLINEAR_USE is true if the invariant is used in a way we do not
1088    handle specially.  */
1089
1090 static void
1091 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1092 {
1093   basic_block bb;
1094   struct version_info *info;
1095
1096   if (TREE_CODE (op) != SSA_NAME
1097       || !is_gimple_reg (op))
1098     return;
1099
1100   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1101   if (bb
1102       && flow_bb_inside_loop_p (data->current_loop, bb))
1103     return;
1104
1105   info = name_info (data, op);
1106   info->name = op;
1107   info->has_nonlin_use |= nonlinear_use;
1108   if (!info->inv_id)
1109     info->inv_id = ++data->max_inv_id;
1110   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1111 }
1112
1113 /* Checks whether the use OP is interesting and if so, records it.  */
1114
1115 static struct iv_use *
1116 find_interesting_uses_op (struct ivopts_data *data, tree op)
1117 {
1118   struct iv *iv;
1119   struct iv *civ;
1120   tree stmt;
1121   struct iv_use *use;
1122
1123   if (TREE_CODE (op) != SSA_NAME)
1124     return NULL;
1125
1126   iv = get_iv (data, op);
1127   if (!iv)
1128     return NULL;
1129
1130   if (iv->have_use_for)
1131     {
1132       use = iv_use (data, iv->use_id);
1133
1134       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1135       return use;
1136     }
1137
1138   if (integer_zerop (iv->step))
1139     {
1140       record_invariant (data, op, true);
1141       return NULL;
1142     }
1143   iv->have_use_for = true;
1144
1145   civ = XNEW (struct iv);
1146   *civ = *iv;
1147
1148   stmt = SSA_NAME_DEF_STMT (op);
1149   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1150               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1151
1152   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1153   iv->use_id = use->id;
1154
1155   return use;
1156 }
1157
1158 /* Given a condition *COND_P, checks whether it is a compare of an induction
1159    variable and an invariant.  If this is the case, CONTROL_VAR is set
1160    to location of the iv, BOUND to the location of the invariant,
1161    IV_VAR and IV_BOUND are set to the corresponding induction variable
1162    descriptions, and true is returned.  If this is not the case,
1163    CONTROL_VAR and BOUND are set to the arguments of the condition and
1164    false is returned.  */
1165
1166 static bool
1167 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1168                        tree **control_var, tree **bound,
1169                        struct iv **iv_var, struct iv **iv_bound)
1170 {
1171   /* The nodes returned when COND has just one operand.  Note that you should
1172      not modify anything in BOUND or IV_BOUND because of this.  */
1173   static struct iv const_iv;
1174   static tree zero;
1175   tree cond = *cond_p;
1176   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1177   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1178   bool ret = false;
1179
1180   zero = integer_zero_node;
1181   const_iv.step = integer_zero_node;
1182
1183   if (TREE_CODE (cond) == SSA_NAME)
1184     {
1185       op0 = cond_p;
1186       iv0 = get_iv (data, cond);
1187       ret = (iv0 && !integer_zerop (iv0->step));
1188       goto end;
1189     }
1190
1191   if (!COMPARISON_CLASS_P (cond))
1192     {
1193       op0 = cond_p;
1194       goto end;
1195     }
1196
1197   op0 = &TREE_OPERAND (cond, 0);
1198   op1 = &TREE_OPERAND (cond, 1);
1199   if (TREE_CODE (*op0) == SSA_NAME)
1200     iv0 = get_iv (data, *op0);
1201   if (TREE_CODE (*op1) == SSA_NAME)
1202     iv1 = get_iv (data, *op1);
1203
1204   /* Exactly one of the compared values must be an iv, and the other one must
1205      be an invariant.  */
1206   if (!iv0 || !iv1)
1207     goto end;
1208
1209   if (integer_zerop (iv0->step))
1210     {
1211       /* Control variable may be on the other side.  */
1212       tmp_op = op0; op0 = op1; op1 = tmp_op;
1213       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1214     }
1215   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1216
1217 end:
1218   if (control_var)
1219     *control_var = op0;;
1220   if (iv_var)
1221     *iv_var = iv0;;
1222   if (bound)
1223     *bound = op1;
1224   if (iv_bound)
1225     *iv_bound = iv1;
1226
1227   return ret;
1228 }
1229
1230 /* Checks whether the condition *COND_P in STMT is interesting
1231    and if so, records it.  */
1232
1233 static void
1234 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1235 {
1236   tree *var_p, *bound_p;
1237   struct iv *var_iv, *civ;
1238
1239   if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1240     {
1241       find_interesting_uses_op (data, *var_p);
1242       find_interesting_uses_op (data, *bound_p);
1243       return;
1244     }
1245
1246   civ = XNEW (struct iv);
1247   *civ = *var_iv;
1248   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1249 }
1250
1251 /* Returns true if expression EXPR is obviously invariant in LOOP,
1252    i.e. if all its operands are defined outside of the LOOP.  */
1253
1254 bool
1255 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1256 {
1257   basic_block def_bb;
1258   unsigned i, len;
1259
1260   if (is_gimple_min_invariant (expr))
1261     return true;
1262
1263   if (TREE_CODE (expr) == SSA_NAME)
1264     {
1265       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1266       if (def_bb
1267           && flow_bb_inside_loop_p (loop, def_bb))
1268         return false;
1269
1270       return true;
1271     }
1272
1273   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1274     return false;
1275
1276   len = TREE_OPERAND_LENGTH (expr);
1277   for (i = 0; i < len; i++)
1278     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1279       return false;
1280
1281   return true;
1282 }
1283
1284 /* Cumulates the steps of indices into DATA and replaces their values with the
1285    initial ones.  Returns false when the value of the index cannot be determined.
1286    Callback for for_each_index.  */
1287
1288 struct ifs_ivopts_data
1289 {
1290   struct ivopts_data *ivopts_data;
1291   tree stmt;
1292   tree step;
1293 };
1294
1295 static bool
1296 idx_find_step (tree base, tree *idx, void *data)
1297 {
1298   struct ifs_ivopts_data *dta = data;
1299   struct iv *iv;
1300   tree step, iv_base, iv_step, lbound, off;
1301   struct loop *loop = dta->ivopts_data->current_loop;
1302
1303   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1304       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1305     return false;
1306
1307   /* If base is a component ref, require that the offset of the reference
1308      be invariant.  */
1309   if (TREE_CODE (base) == COMPONENT_REF)
1310     {
1311       off = component_ref_field_offset (base);
1312       return expr_invariant_in_loop_p (loop, off);
1313     }
1314
1315   /* If base is array, first check whether we will be able to move the
1316      reference out of the loop (in order to take its address in strength
1317      reduction).  In order for this to work we need both lower bound
1318      and step to be loop invariants.  */
1319   if (TREE_CODE (base) == ARRAY_REF)
1320     {
1321       step = array_ref_element_size (base);
1322       lbound = array_ref_low_bound (base);
1323
1324       if (!expr_invariant_in_loop_p (loop, step)
1325           || !expr_invariant_in_loop_p (loop, lbound))
1326         return false;
1327     }
1328
1329   if (TREE_CODE (*idx) != SSA_NAME)
1330     return true;
1331
1332   iv = get_iv (dta->ivopts_data, *idx);
1333   if (!iv)
1334     return false;
1335
1336   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1337           *&x[0], which is not folded and does not trigger the
1338           ARRAY_REF path below.  */
1339   *idx = iv->base;
1340
1341   if (integer_zerop (iv->step))
1342     return true;
1343
1344   if (TREE_CODE (base) == ARRAY_REF)
1345     {
1346       step = array_ref_element_size (base);
1347
1348       /* We only handle addresses whose step is an integer constant.  */
1349       if (TREE_CODE (step) != INTEGER_CST)
1350         return false;
1351     }
1352   else
1353     /* The step for pointer arithmetics already is 1 byte.  */
1354     step = build_int_cst (sizetype, 1);
1355
1356   iv_base = iv->base;
1357   iv_step = iv->step;
1358   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1359                             sizetype, &iv_base, &iv_step, dta->stmt,
1360                             false))
1361     {
1362       /* The index might wrap.  */
1363       return false;
1364     }
1365
1366   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1367   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1368
1369   return true;
1370 }
1371
1372 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1373    object is passed to it in DATA.  */
1374
1375 static bool
1376 idx_record_use (tree base, tree *idx,
1377                 void *data)
1378 {
1379   find_interesting_uses_op (data, *idx);
1380   if (TREE_CODE (base) == ARRAY_REF)
1381     {
1382       find_interesting_uses_op (data, array_ref_element_size (base));
1383       find_interesting_uses_op (data, array_ref_low_bound (base));
1384     }
1385   return true;
1386 }
1387
1388 /* Returns true if memory reference REF may be unaligned.  */
1389
1390 static bool
1391 may_be_unaligned_p (tree ref)
1392 {
1393   tree base;
1394   tree base_type;
1395   HOST_WIDE_INT bitsize;
1396   HOST_WIDE_INT bitpos;
1397   tree toffset;
1398   enum machine_mode mode;
1399   int unsignedp, volatilep;
1400   unsigned base_align;
1401
1402   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1403      thus they are not misaligned.  */
1404   if (TREE_CODE (ref) == TARGET_MEM_REF)
1405     return false;
1406
1407   /* The test below is basically copy of what expr.c:normal_inner_ref
1408      does to check whether the object must be loaded by parts when
1409      STRICT_ALIGNMENT is true.  */
1410   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1411                               &unsignedp, &volatilep, true);
1412   base_type = TREE_TYPE (base);
1413   base_align = TYPE_ALIGN (base_type);
1414
1415   if (mode != BLKmode
1416       && (base_align < GET_MODE_ALIGNMENT (mode)
1417           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1418           || bitpos % BITS_PER_UNIT != 0))
1419     return true;
1420
1421   return false;
1422 }
1423
1424 /* Return true if EXPR may be non-addressable.   */
1425
1426 static bool
1427 may_be_nonaddressable_p (tree expr)
1428 {
1429   switch (TREE_CODE (expr))
1430     {
1431     case COMPONENT_REF:
1432       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1433              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1434
1435     case ARRAY_REF:
1436     case ARRAY_RANGE_REF:
1437       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1438
1439     case VIEW_CONVERT_EXPR:
1440       /* This kind of view-conversions may wrap non-addressable objects
1441          and make them look addressable.  After some processing the
1442          non-addressability may be uncovered again, causing ADDR_EXPRs
1443          of inappropriate objects to be built.  */
1444       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1445              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1446
1447     default:
1448       break;
1449     }
1450
1451   return false;
1452 }
1453
1454 /* Finds addresses in *OP_P inside STMT.  */
1455
1456 static void
1457 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1458 {
1459   tree base = *op_p, step = build_int_cst (sizetype, 0);
1460   struct iv *civ;
1461   struct ifs_ivopts_data ifs_ivopts_data;
1462
1463   /* Do not play with volatile memory references.  A bit too conservative,
1464      perhaps, but safe.  */
1465   if (stmt_ann (stmt)->has_volatile_ops)
1466     goto fail;
1467
1468   /* Ignore bitfields for now.  Not really something terribly complicated
1469      to handle.  TODO.  */
1470   if (TREE_CODE (base) == BIT_FIELD_REF)
1471     goto fail;
1472
1473   if (may_be_nonaddressable_p (base))
1474     goto fail;
1475
1476   if (STRICT_ALIGNMENT
1477       && may_be_unaligned_p (base))
1478     goto fail;
1479
1480   base = unshare_expr (base);
1481
1482   if (TREE_CODE (base) == TARGET_MEM_REF)
1483     {
1484       tree type = build_pointer_type (TREE_TYPE (base));
1485       tree astep;
1486
1487       if (TMR_BASE (base)
1488           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1489         {
1490           civ = get_iv (data, TMR_BASE (base));
1491           if (!civ)
1492             goto fail;
1493
1494           TMR_BASE (base) = civ->base;
1495           step = civ->step;
1496         }
1497       if (TMR_INDEX (base)
1498           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1499         {
1500           civ = get_iv (data, TMR_INDEX (base));
1501           if (!civ)
1502             goto fail;
1503
1504           TMR_INDEX (base) = civ->base;
1505           astep = civ->step;
1506
1507           if (astep)
1508             {
1509               if (TMR_STEP (base))
1510                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1511
1512               step = fold_build2 (PLUS_EXPR, type, step, astep);
1513             }
1514         }
1515
1516       if (integer_zerop (step))
1517         goto fail;
1518       base = tree_mem_ref_addr (type, base);
1519     }
1520   else
1521     {
1522       ifs_ivopts_data.ivopts_data = data;
1523       ifs_ivopts_data.stmt = stmt;
1524       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1525       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1526           || integer_zerop (ifs_ivopts_data.step))
1527         goto fail;
1528       step = ifs_ivopts_data.step;
1529
1530       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1531       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1532
1533       base = build_fold_addr_expr (base);
1534
1535       /* Substituting bases of IVs into the base expression might
1536          have caused folding opportunities.  */
1537       if (TREE_CODE (base) == ADDR_EXPR)
1538         {
1539           tree *ref = &TREE_OPERAND (base, 0);
1540           while (handled_component_p (*ref))
1541             ref = &TREE_OPERAND (*ref, 0);
1542           if (TREE_CODE (*ref) == INDIRECT_REF)
1543             *ref = fold_indirect_ref (*ref);
1544         }
1545     }
1546
1547   civ = alloc_iv (base, step);
1548   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1549   return;
1550
1551 fail:
1552   for_each_index (op_p, idx_record_use, data);
1553 }
1554
1555 /* Finds and records invariants used in STMT.  */
1556
1557 static void
1558 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1559 {
1560   ssa_op_iter iter;
1561   use_operand_p use_p;
1562   tree op;
1563
1564   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1565     {
1566       op = USE_FROM_PTR (use_p);
1567       record_invariant (data, op, false);
1568     }
1569 }
1570
1571 /* Finds interesting uses of induction variables in the statement STMT.  */
1572
1573 static void
1574 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1575 {
1576   struct iv *iv;
1577   tree op, lhs, rhs;
1578   ssa_op_iter iter;
1579   use_operand_p use_p;
1580
1581   find_invariants_stmt (data, stmt);
1582
1583   if (TREE_CODE (stmt) == COND_EXPR)
1584     {
1585       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1586       return;
1587     }
1588
1589   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1590     {
1591       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1592       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1593
1594       if (TREE_CODE (lhs) == SSA_NAME)
1595         {
1596           /* If the statement defines an induction variable, the uses are not
1597              interesting by themselves.  */
1598
1599           iv = get_iv (data, lhs);
1600
1601           if (iv && !integer_zerop (iv->step))
1602             return;
1603         }
1604
1605       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1606         {
1607         case tcc_comparison:
1608           find_interesting_uses_cond (data, stmt,
1609                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1610           return;
1611
1612         case tcc_reference:
1613           find_interesting_uses_address (data, stmt,
1614                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1615           if (REFERENCE_CLASS_P (lhs))
1616             find_interesting_uses_address (data, stmt,
1617                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1618           return;
1619
1620         default: ;
1621         }
1622
1623       if (REFERENCE_CLASS_P (lhs)
1624           && is_gimple_val (rhs))
1625         {
1626           find_interesting_uses_address (data, stmt,
1627                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1628           find_interesting_uses_op (data, rhs);
1629           return;
1630         }
1631
1632       /* TODO -- we should also handle address uses of type
1633
1634          memory = call (whatever);
1635
1636          and
1637
1638          call (memory).  */
1639     }
1640
1641   if (TREE_CODE (stmt) == PHI_NODE
1642       && bb_for_stmt (stmt) == data->current_loop->header)
1643     {
1644       lhs = PHI_RESULT (stmt);
1645       iv = get_iv (data, lhs);
1646
1647       if (iv && !integer_zerop (iv->step))
1648         return;
1649     }
1650
1651   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1652     {
1653       op = USE_FROM_PTR (use_p);
1654
1655       if (TREE_CODE (op) != SSA_NAME)
1656         continue;
1657
1658       iv = get_iv (data, op);
1659       if (!iv)
1660         continue;
1661
1662       find_interesting_uses_op (data, op);
1663     }
1664 }
1665
1666 /* Finds interesting uses of induction variables outside of loops
1667    on loop exit edge EXIT.  */
1668
1669 static void
1670 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1671 {
1672   tree phi, def;
1673
1674   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1675     {
1676       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1677       if (is_gimple_reg (def))
1678         find_interesting_uses_op (data, def);
1679     }
1680 }
1681
1682 /* Finds uses of the induction variables that are interesting.  */
1683
1684 static void
1685 find_interesting_uses (struct ivopts_data *data)
1686 {
1687   basic_block bb;
1688   block_stmt_iterator bsi;
1689   tree phi;
1690   basic_block *body = get_loop_body (data->current_loop);
1691   unsigned i;
1692   struct version_info *info;
1693   edge e;
1694
1695   if (dump_file && (dump_flags & TDF_DETAILS))
1696     fprintf (dump_file, "Uses:\n\n");
1697
1698   for (i = 0; i < data->current_loop->num_nodes; i++)
1699     {
1700       edge_iterator ei;
1701       bb = body[i];
1702
1703       FOR_EACH_EDGE (e, ei, bb->succs)
1704         if (e->dest != EXIT_BLOCK_PTR
1705             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1706           find_interesting_uses_outside (data, e);
1707
1708       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1709         find_interesting_uses_stmt (data, phi);
1710       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1711         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1712     }
1713
1714   if (dump_file && (dump_flags & TDF_DETAILS))
1715     {
1716       bitmap_iterator bi;
1717
1718       fprintf (dump_file, "\n");
1719
1720       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1721         {
1722           info = ver_info (data, i);
1723           if (info->inv_id)
1724             {
1725               fprintf (dump_file, "  ");
1726               print_generic_expr (dump_file, info->name, TDF_SLIM);
1727               fprintf (dump_file, " is invariant (%d)%s\n",
1728                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1729             }
1730         }
1731
1732       fprintf (dump_file, "\n");
1733     }
1734
1735   free (body);
1736 }
1737
1738 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1739    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1740    we are at the top-level of the processed address.  */
1741
1742 static tree
1743 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1744                 unsigned HOST_WIDE_INT *offset)
1745 {
1746   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1747   enum tree_code code;
1748   tree type, orig_type = TREE_TYPE (expr);
1749   unsigned HOST_WIDE_INT off0, off1, st;
1750   tree orig_expr = expr;
1751
1752   STRIP_NOPS (expr);
1753
1754   type = TREE_TYPE (expr);
1755   code = TREE_CODE (expr);
1756   *offset = 0;
1757
1758   switch (code)
1759     {
1760     case INTEGER_CST:
1761       if (!cst_and_fits_in_hwi (expr)
1762           || integer_zerop (expr))
1763         return orig_expr;
1764
1765       *offset = int_cst_value (expr);
1766       return build_int_cst (orig_type, 0);
1767
1768     case PLUS_EXPR:
1769     case MINUS_EXPR:
1770       op0 = TREE_OPERAND (expr, 0);
1771       op1 = TREE_OPERAND (expr, 1);
1772
1773       op0 = strip_offset_1 (op0, false, false, &off0);
1774       op1 = strip_offset_1 (op1, false, false, &off1);
1775
1776       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1777       if (op0 == TREE_OPERAND (expr, 0)
1778           && op1 == TREE_OPERAND (expr, 1))
1779         return orig_expr;
1780
1781       if (integer_zerop (op1))
1782         expr = op0;
1783       else if (integer_zerop (op0))
1784         {
1785           if (code == PLUS_EXPR)
1786             expr = op1;
1787           else
1788             expr = fold_build1 (NEGATE_EXPR, type, op1);
1789         }
1790       else
1791         expr = fold_build2 (code, type, op0, op1);
1792
1793       return fold_convert (orig_type, expr);
1794
1795     case ARRAY_REF:
1796       if (!inside_addr)
1797         return orig_expr;
1798
1799       step = array_ref_element_size (expr);
1800       if (!cst_and_fits_in_hwi (step))
1801         break;
1802
1803       st = int_cst_value (step);
1804       op1 = TREE_OPERAND (expr, 1);
1805       op1 = strip_offset_1 (op1, false, false, &off1);
1806       *offset = off1 * st;
1807
1808       if (top_compref
1809           && integer_zerop (op1))
1810         {
1811           /* Strip the component reference completely.  */
1812           op0 = TREE_OPERAND (expr, 0);
1813           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1814           *offset += off0;
1815           return op0;
1816         }
1817       break;
1818
1819     case COMPONENT_REF:
1820       if (!inside_addr)
1821         return orig_expr;
1822
1823       tmp = component_ref_field_offset (expr);
1824       if (top_compref
1825           && cst_and_fits_in_hwi (tmp))
1826         {
1827           /* Strip the component reference completely.  */
1828           op0 = TREE_OPERAND (expr, 0);
1829           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1830           *offset = off0 + int_cst_value (tmp);
1831           return op0;
1832         }
1833       break;
1834
1835     case ADDR_EXPR:
1836       op0 = TREE_OPERAND (expr, 0);
1837       op0 = strip_offset_1 (op0, true, true, &off0);
1838       *offset += off0;
1839
1840       if (op0 == TREE_OPERAND (expr, 0))
1841         return orig_expr;
1842
1843       expr = build_fold_addr_expr (op0);
1844       return fold_convert (orig_type, expr);
1845
1846     case INDIRECT_REF:
1847       inside_addr = false;
1848       break;
1849
1850     default:
1851       return orig_expr;
1852     }
1853
1854   /* Default handling of expressions for that we want to recurse into
1855      the first operand.  */
1856   op0 = TREE_OPERAND (expr, 0);
1857   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1858   *offset += off0;
1859
1860   if (op0 == TREE_OPERAND (expr, 0)
1861       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1862     return orig_expr;
1863
1864   expr = copy_node (expr);
1865   TREE_OPERAND (expr, 0) = op0;
1866   if (op1)
1867     TREE_OPERAND (expr, 1) = op1;
1868
1869   /* Inside address, we might strip the top level component references,
1870      thus changing type of the expression.  Handling of ADDR_EXPR
1871      will fix that.  */
1872   expr = fold_convert (orig_type, expr);
1873
1874   return expr;
1875 }
1876
1877 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1878
1879 static tree
1880 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1881 {
1882   return strip_offset_1 (expr, false, false, offset);
1883 }
1884
1885 /* Returns variant of TYPE that can be used as base for different uses.
1886    We return unsigned type with the same precision, which avoids problems
1887    with overflows.  */
1888
1889 static tree
1890 generic_type_for (tree type)
1891 {
1892   if (POINTER_TYPE_P (type))
1893     return unsigned_type_for (type);
1894
1895   if (TYPE_UNSIGNED (type))
1896     return type;
1897
1898   return unsigned_type_for (type);
1899 }
1900
1901 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1902    the bitmap to that we should store it.  */
1903
1904 static struct ivopts_data *fd_ivopts_data;
1905 static tree
1906 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1907 {
1908   bitmap *depends_on = data;
1909   struct version_info *info;
1910
1911   if (TREE_CODE (*expr_p) != SSA_NAME)
1912     return NULL_TREE;
1913   info = name_info (fd_ivopts_data, *expr_p);
1914
1915   if (!info->inv_id || info->has_nonlin_use)
1916     return NULL_TREE;
1917
1918   if (!*depends_on)
1919     *depends_on = BITMAP_ALLOC (NULL);
1920   bitmap_set_bit (*depends_on, info->inv_id);
1921
1922   return NULL_TREE;
1923 }
1924
1925 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1926    position to POS.  If USE is not NULL, the candidate is set as related to
1927    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1928    replacement of the final value of the iv by a direct computation.  */
1929
1930 static struct iv_cand *
1931 add_candidate_1 (struct ivopts_data *data,
1932                  tree base, tree step, bool important, enum iv_position pos,
1933                  struct iv_use *use, tree incremented_at)
1934 {
1935   unsigned i;
1936   struct iv_cand *cand = NULL;
1937   tree type, orig_type;
1938
1939   if (base)
1940     {
1941       orig_type = TREE_TYPE (base);
1942       type = generic_type_for (orig_type);
1943       if (type != orig_type)
1944         {
1945           base = fold_convert (type, base);
1946           step = fold_convert (type, step);
1947         }
1948     }
1949
1950   for (i = 0; i < n_iv_cands (data); i++)
1951     {
1952       cand = iv_cand (data, i);
1953
1954       if (cand->pos != pos)
1955         continue;
1956
1957       if (cand->incremented_at != incremented_at)
1958         continue;
1959
1960       if (!cand->iv)
1961         {
1962           if (!base && !step)
1963             break;
1964
1965           continue;
1966         }
1967
1968       if (!base && !step)
1969         continue;
1970
1971       if (operand_equal_p (base, cand->iv->base, 0)
1972           && operand_equal_p (step, cand->iv->step, 0))
1973         break;
1974     }
1975
1976   if (i == n_iv_cands (data))
1977     {
1978       cand = XCNEW (struct iv_cand);
1979       cand->id = i;
1980
1981       if (!base && !step)
1982         cand->iv = NULL;
1983       else
1984         cand->iv = alloc_iv (base, step);
1985
1986       cand->pos = pos;
1987       if (pos != IP_ORIGINAL && cand->iv)
1988         {
1989           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1990           cand->var_after = cand->var_before;
1991         }
1992       cand->important = important;
1993       cand->incremented_at = incremented_at;
1994       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1995
1996       if (step
1997           && TREE_CODE (step) != INTEGER_CST)
1998         {
1999           fd_ivopts_data = data;
2000           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2001         }
2002
2003       if (dump_file && (dump_flags & TDF_DETAILS))
2004         dump_cand (dump_file, cand);
2005     }
2006
2007   if (important && !cand->important)
2008     {
2009       cand->important = true;
2010       if (dump_file && (dump_flags & TDF_DETAILS))
2011         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2012     }
2013
2014   if (use)
2015     {
2016       bitmap_set_bit (use->related_cands, i);
2017       if (dump_file && (dump_flags & TDF_DETAILS))
2018         fprintf (dump_file, "Candidate %d is related to use %d\n",
2019                  cand->id, use->id);
2020     }
2021
2022   return cand;
2023 }
2024
2025 /* Returns true if incrementing the induction variable at the end of the LOOP
2026    is allowed.
2027
2028    The purpose is to avoid splitting latch edge with a biv increment, thus
2029    creating a jump, possibly confusing other optimization passes and leaving
2030    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2031    is not available (so we do not have a better alternative), or if the latch
2032    edge is already nonempty.  */
2033
2034 static bool
2035 allow_ip_end_pos_p (struct loop *loop)
2036 {
2037   if (!ip_normal_pos (loop))
2038     return true;
2039
2040   if (!empty_block_p (ip_end_pos (loop)))
2041     return true;
2042
2043   return false;
2044 }
2045
2046 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2047    position to POS.  If USE is not NULL, the candidate is set as related to
2048    it.  The candidate computation is scheduled on all available positions.  */
2049
2050 static void
2051 add_candidate (struct ivopts_data *data,
2052                tree base, tree step, bool important, struct iv_use *use)
2053 {
2054   if (ip_normal_pos (data->current_loop))
2055     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2056   if (ip_end_pos (data->current_loop)
2057       && allow_ip_end_pos_p (data->current_loop))
2058     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2059 }
2060
2061 /* Add a standard "0 + 1 * iteration" iv candidate for a
2062    type with SIZE bits.  */
2063
2064 static void
2065 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2066                                      unsigned int size)
2067 {
2068   tree type = lang_hooks.types.type_for_size (size, true);
2069   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2070                  true, NULL);
2071 }
2072
2073 /* Adds standard iv candidates.  */
2074
2075 static void
2076 add_standard_iv_candidates (struct ivopts_data *data)
2077 {
2078   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2079
2080   /* The same for a double-integer type if it is still fast enough.  */
2081   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2082     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2083 }
2084
2085
2086 /* Adds candidates bases on the old induction variable IV.  */
2087
2088 static void
2089 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2090 {
2091   tree phi, def;
2092   struct iv_cand *cand;
2093
2094   add_candidate (data, iv->base, iv->step, true, NULL);
2095
2096   /* The same, but with initial value zero.  */
2097   add_candidate (data,
2098                  build_int_cst (TREE_TYPE (iv->base), 0),
2099                  iv->step, true, NULL);
2100
2101   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2102   if (TREE_CODE (phi) == PHI_NODE)
2103     {
2104       /* Additionally record the possibility of leaving the original iv
2105          untouched.  */
2106       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2107       cand = add_candidate_1 (data,
2108                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2109                               SSA_NAME_DEF_STMT (def));
2110       cand->var_before = iv->ssa_name;
2111       cand->var_after = def;
2112     }
2113 }
2114
2115 /* Adds candidates based on the old induction variables.  */
2116
2117 static void
2118 add_old_ivs_candidates (struct ivopts_data *data)
2119 {
2120   unsigned i;
2121   struct iv *iv;
2122   bitmap_iterator bi;
2123
2124   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2125     {
2126       iv = ver_info (data, i)->iv;
2127       if (iv && iv->biv_p && !integer_zerop (iv->step))
2128         add_old_iv_candidates (data, iv);
2129     }
2130 }
2131
2132 /* Adds candidates based on the value of the induction variable IV and USE.  */
2133
2134 static void
2135 add_iv_value_candidates (struct ivopts_data *data,
2136                          struct iv *iv, struct iv_use *use)
2137 {
2138   unsigned HOST_WIDE_INT offset;
2139   tree base;
2140
2141   add_candidate (data, iv->base, iv->step, false, use);
2142
2143   /* The same, but with initial value zero.  Make such variable important,
2144      since it is generic enough so that possibly many uses may be based
2145      on it.  */
2146   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2147                  iv->step, true, use);
2148
2149   /* Third, try removing the constant offset.  */
2150   base = strip_offset (iv->base, &offset);
2151   if (offset)
2152     add_candidate (data, base, iv->step, false, use);
2153 }
2154
2155 /* Adds candidates based on the uses.  */
2156
2157 static void
2158 add_derived_ivs_candidates (struct ivopts_data *data)
2159 {
2160   unsigned i;
2161
2162   for (i = 0; i < n_iv_uses (data); i++)
2163     {
2164       struct iv_use *use = iv_use (data, i);
2165
2166       if (!use)
2167         continue;
2168
2169       switch (use->type)
2170         {
2171         case USE_NONLINEAR_EXPR:
2172         case USE_COMPARE:
2173         case USE_ADDRESS:
2174           /* Just add the ivs based on the value of the iv used here.  */
2175           add_iv_value_candidates (data, use->iv, use);
2176           break;
2177
2178         default:
2179           gcc_unreachable ();
2180         }
2181     }
2182 }
2183
2184 /* Record important candidates and add them to related_cands bitmaps
2185    if needed.  */
2186
2187 static void
2188 record_important_candidates (struct ivopts_data *data)
2189 {
2190   unsigned i;
2191   struct iv_use *use;
2192
2193   for (i = 0; i < n_iv_cands (data); i++)
2194     {
2195       struct iv_cand *cand = iv_cand (data, i);
2196
2197       if (cand->important)
2198         bitmap_set_bit (data->important_candidates, i);
2199     }
2200
2201   data->consider_all_candidates = (n_iv_cands (data)
2202                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2203
2204   if (data->consider_all_candidates)
2205     {
2206       /* We will not need "related_cands" bitmaps in this case,
2207          so release them to decrease peak memory consumption.  */
2208       for (i = 0; i < n_iv_uses (data); i++)
2209         {
2210           use = iv_use (data, i);
2211           BITMAP_FREE (use->related_cands);
2212         }
2213     }
2214   else
2215     {
2216       /* Add important candidates to the related_cands bitmaps.  */
2217       for (i = 0; i < n_iv_uses (data); i++)
2218         bitmap_ior_into (iv_use (data, i)->related_cands,
2219                          data->important_candidates);
2220     }
2221 }
2222
2223 /* Finds the candidates for the induction variables.  */
2224
2225 static void
2226 find_iv_candidates (struct ivopts_data *data)
2227 {
2228   /* Add commonly used ivs.  */
2229   add_standard_iv_candidates (data);
2230
2231   /* Add old induction variables.  */
2232   add_old_ivs_candidates (data);
2233
2234   /* Add induction variables derived from uses.  */
2235   add_derived_ivs_candidates (data);
2236
2237   /* Record the important candidates.  */
2238   record_important_candidates (data);
2239 }
2240
2241 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2242    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2243    we allocate a simple list to every use.  */
2244
2245 static void
2246 alloc_use_cost_map (struct ivopts_data *data)
2247 {
2248   unsigned i, size, s, j;
2249
2250   for (i = 0; i < n_iv_uses (data); i++)
2251     {
2252       struct iv_use *use = iv_use (data, i);
2253       bitmap_iterator bi;
2254
2255       if (data->consider_all_candidates)
2256         size = n_iv_cands (data);
2257       else
2258         {
2259           s = 0;
2260           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2261             {
2262               s++;
2263             }
2264
2265           /* Round up to the power of two, so that moduling by it is fast.  */
2266           for (size = 1; size < s; size <<= 1)
2267             continue;
2268         }
2269
2270       use->n_map_members = size;
2271       use->cost_map = XCNEWVEC (struct cost_pair, size);
2272     }
2273 }
2274
2275 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2276    on invariants DEPENDS_ON and that the value used in expressing it
2277    is VALUE.*/
2278
2279 static void
2280 set_use_iv_cost (struct ivopts_data *data,
2281                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2282                  bitmap depends_on, tree value)
2283 {
2284   unsigned i, s;
2285
2286   if (cost == INFTY)
2287     {
2288       BITMAP_FREE (depends_on);
2289       return;
2290     }
2291
2292   if (data->consider_all_candidates)
2293     {
2294       use->cost_map[cand->id].cand = cand;
2295       use->cost_map[cand->id].cost = cost;
2296       use->cost_map[cand->id].depends_on = depends_on;
2297       use->cost_map[cand->id].value = value;
2298       return;
2299     }
2300
2301   /* n_map_members is a power of two, so this computes modulo.  */
2302   s = cand->id & (use->n_map_members - 1);
2303   for (i = s; i < use->n_map_members; i++)
2304     if (!use->cost_map[i].cand)
2305       goto found;
2306   for (i = 0; i < s; i++)
2307     if (!use->cost_map[i].cand)
2308       goto found;
2309
2310   gcc_unreachable ();
2311
2312 found:
2313   use->cost_map[i].cand = cand;
2314   use->cost_map[i].cost = cost;
2315   use->cost_map[i].depends_on = depends_on;
2316   use->cost_map[i].value = value;
2317 }
2318
2319 /* Gets cost of (USE, CANDIDATE) pair.  */
2320
2321 static struct cost_pair *
2322 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2323                  struct iv_cand *cand)
2324 {
2325   unsigned i, s;
2326   struct cost_pair *ret;
2327
2328   if (!cand)
2329     return NULL;
2330
2331   if (data->consider_all_candidates)
2332     {
2333       ret = use->cost_map + cand->id;
2334       if (!ret->cand)
2335         return NULL;
2336
2337       return ret;
2338     }
2339
2340   /* n_map_members is a power of two, so this computes modulo.  */
2341   s = cand->id & (use->n_map_members - 1);
2342   for (i = s; i < use->n_map_members; i++)
2343     if (use->cost_map[i].cand == cand)
2344       return use->cost_map + i;
2345
2346   for (i = 0; i < s; i++)
2347     if (use->cost_map[i].cand == cand)
2348       return use->cost_map + i;
2349
2350   return NULL;
2351 }
2352
2353 /* Returns estimate on cost of computing SEQ.  */
2354
2355 static unsigned
2356 seq_cost (rtx seq)
2357 {
2358   unsigned cost = 0;
2359   rtx set;
2360
2361   for (; seq; seq = NEXT_INSN (seq))
2362     {
2363       set = single_set (seq);
2364       if (set)
2365         cost += rtx_cost (set, SET);
2366       else
2367         cost++;
2368     }
2369
2370   return cost;
2371 }
2372
2373 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2374 static rtx
2375 produce_memory_decl_rtl (tree obj, int *regno)
2376 {
2377   rtx x;
2378
2379   gcc_assert (obj);
2380   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2381     {
2382       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2383       x = gen_rtx_SYMBOL_REF (Pmode, name);
2384       SET_SYMBOL_REF_DECL (x, obj);
2385       x = gen_rtx_MEM (DECL_MODE (obj), x);
2386       targetm.encode_section_info (obj, x, true);
2387     }
2388   else
2389     {
2390       x = gen_raw_REG (Pmode, (*regno)++);
2391       x = gen_rtx_MEM (DECL_MODE (obj), x);
2392     }
2393
2394   return x;
2395 }
2396
2397 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2398    walk_tree.  DATA contains the actual fake register number.  */
2399
2400 static tree
2401 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2402 {
2403   tree obj = NULL_TREE;
2404   rtx x = NULL_RTX;
2405   int *regno = data;
2406
2407   switch (TREE_CODE (*expr_p))
2408     {
2409     case ADDR_EXPR:
2410       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2411            handled_component_p (*expr_p);
2412            expr_p = &TREE_OPERAND (*expr_p, 0))
2413         continue;
2414       obj = *expr_p;
2415       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2416         x = produce_memory_decl_rtl (obj, regno);
2417       break;
2418
2419     case SSA_NAME:
2420       *ws = 0;
2421       obj = SSA_NAME_VAR (*expr_p);
2422       if (!DECL_RTL_SET_P (obj))
2423         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2424       break;
2425
2426     case VAR_DECL:
2427     case PARM_DECL:
2428     case RESULT_DECL:
2429       *ws = 0;
2430       obj = *expr_p;
2431
2432       if (DECL_RTL_SET_P (obj))
2433         break;
2434
2435       if (DECL_MODE (obj) == BLKmode)
2436         x = produce_memory_decl_rtl (obj, regno);
2437       else
2438         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2439
2440       break;
2441
2442     default:
2443       break;
2444     }
2445
2446   if (x)
2447     {
2448       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2449       SET_DECL_RTL (obj, x);
2450     }
2451
2452   return NULL_TREE;
2453 }
2454
2455 /* Determines cost of the computation of EXPR.  */
2456
2457 static unsigned
2458 computation_cost (tree expr)
2459 {
2460   rtx seq, rslt;
2461   tree type = TREE_TYPE (expr);
2462   unsigned cost;
2463   /* Avoid using hard regs in ways which may be unsupported.  */
2464   int regno = LAST_VIRTUAL_REGISTER + 1;
2465
2466   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2467   start_sequence ();
2468   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2469   seq = get_insns ();
2470   end_sequence ();
2471
2472   cost = seq_cost (seq);
2473   if (MEM_P (rslt))
2474     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2475
2476   return cost;
2477 }
2478
2479 /* Returns variable containing the value of candidate CAND at statement AT.  */
2480
2481 static tree
2482 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2483 {
2484   if (stmt_after_increment (loop, cand, stmt))
2485     return cand->var_after;
2486   else
2487     return cand->var_before;
2488 }
2489
2490 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2491    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2492
2493 int
2494 tree_int_cst_sign_bit (tree t)
2495 {
2496   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2497   unsigned HOST_WIDE_INT w;
2498
2499   if (bitno < HOST_BITS_PER_WIDE_INT)
2500     w = TREE_INT_CST_LOW (t);
2501   else
2502     {
2503       w = TREE_INT_CST_HIGH (t);
2504       bitno -= HOST_BITS_PER_WIDE_INT;
2505     }
2506
2507   return (w >> bitno) & 1;
2508 }
2509
2510 /* If we can prove that TOP = cst * BOT for some constant cst,
2511    store cst to MUL and return true.  Otherwise return false.
2512    The returned value is always sign-extended, regardless of the
2513    signedness of TOP and BOT.  */
2514
2515 static bool
2516 constant_multiple_of (tree top, tree bot, double_int *mul)
2517 {
2518   tree mby;
2519   enum tree_code code;
2520   double_int res, p0, p1;
2521   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2522
2523   STRIP_NOPS (top);
2524   STRIP_NOPS (bot);
2525
2526   if (operand_equal_p (top, bot, 0))
2527     {
2528       *mul = double_int_one;
2529       return true;
2530     }
2531
2532   code = TREE_CODE (top);
2533   switch (code)
2534     {
2535     case MULT_EXPR:
2536       mby = TREE_OPERAND (top, 1);
2537       if (TREE_CODE (mby) != INTEGER_CST)
2538         return false;
2539
2540       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2541         return false;
2542
2543       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2544                               precision);
2545       return true;
2546
2547     case PLUS_EXPR:
2548     case MINUS_EXPR:
2549       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2550           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2551         return false;
2552
2553       if (code == MINUS_EXPR)
2554         p1 = double_int_neg (p1);
2555       *mul = double_int_sext (double_int_add (p0, p1), precision);
2556       return true;
2557
2558     case INTEGER_CST:
2559       if (TREE_CODE (bot) != INTEGER_CST)
2560         return false;
2561
2562       p0 = double_int_sext (tree_to_double_int (top), precision);
2563       p1 = double_int_sext (tree_to_double_int (bot), precision);
2564       if (double_int_zero_p (p1))
2565         return false;
2566       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2567                               precision);
2568       return double_int_zero_p (res);
2569
2570     default:
2571       return false;
2572     }
2573 }
2574
2575 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2576    same precision that is at least as wide as the precision of TYPE, stores
2577    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2578    type of A and B.  */
2579
2580 static tree
2581 determine_common_wider_type (tree *a, tree *b)
2582 {
2583   tree wider_type = NULL;
2584   tree suba, subb;
2585   tree atype = TREE_TYPE (*a);
2586
2587   if ((TREE_CODE (*a) == NOP_EXPR
2588        || TREE_CODE (*a) == CONVERT_EXPR))
2589     {
2590       suba = TREE_OPERAND (*a, 0);
2591       wider_type = TREE_TYPE (suba);
2592       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2593         return atype;
2594     }
2595   else
2596     return atype;
2597
2598   if ((TREE_CODE (*b) == NOP_EXPR
2599        || TREE_CODE (*b) == CONVERT_EXPR))
2600     {
2601       subb = TREE_OPERAND (*b, 0);
2602       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2603         return atype;
2604     }
2605   else
2606     return atype;
2607
2608   *a = suba;
2609   *b = subb;
2610   return wider_type;
2611 }
2612
2613 /* Determines the expression by that USE is expressed from induction variable
2614    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2615    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2616
2617 static bool
2618 get_computation_aff (struct loop *loop,
2619                      struct iv_use *use, struct iv_cand *cand, tree at,
2620                      struct affine_tree_combination *aff)
2621 {
2622   tree ubase = use->iv->base;
2623   tree ustep = use->iv->step;
2624   tree cbase = cand->iv->base;
2625   tree cstep = cand->iv->step, cstep_common;
2626   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2627   tree common_type, var;
2628   tree uutype;
2629   aff_tree cbase_aff, var_aff;
2630   double_int rat;
2631
2632   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2633     {
2634       /* We do not have a precision to express the values of use.  */
2635       return false;
2636     }
2637
2638   var = var_at_stmt (loop, cand, at);
2639   uutype = unsigned_type_for (utype);
2640
2641   /* If the conversion is not noop, perform it.  */
2642   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2643     {
2644       cstep = fold_convert (uutype, cstep);
2645       cbase = fold_convert (uutype, cbase);
2646       var = fold_convert (uutype, var);
2647     }
2648
2649   if (!constant_multiple_of (ustep, cstep, &rat))
2650     return false;
2651
2652   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2653      type, we achieve better folding by computing their difference in this
2654      wider type, and cast the result to UUTYPE.  We do not need to worry about
2655      overflows, as all the arithmetics will in the end be performed in UUTYPE
2656      anyway.  */
2657   common_type = determine_common_wider_type (&ubase, &cbase);
2658
2659   /* use = ubase - ratio * cbase + ratio * var.  */
2660   tree_to_aff_combination (ubase, common_type, aff);
2661   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2662   tree_to_aff_combination (var, uutype, &var_aff);
2663
2664   /* We need to shift the value if we are after the increment.  */
2665   if (stmt_after_increment (loop, cand, at))
2666     {
2667       aff_tree cstep_aff;
2668
2669       if (common_type != uutype)
2670         cstep_common = fold_convert (common_type, cstep);
2671       else
2672         cstep_common = cstep;
2673
2674       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2675       aff_combination_add (&cbase_aff, &cstep_aff);
2676     }
2677
2678   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2679   aff_combination_add (aff, &cbase_aff);
2680   if (common_type != uutype)
2681     aff_combination_convert (aff, uutype);
2682
2683   aff_combination_scale (&var_aff, rat);
2684   aff_combination_add (aff, &var_aff);
2685
2686   return true;
2687 }
2688
2689 /* Determines the expression by that USE is expressed from induction variable
2690    CAND at statement AT in LOOP.  The computation is unshared.  */
2691
2692 static tree
2693 get_computation_at (struct loop *loop,
2694                     struct iv_use *use, struct iv_cand *cand, tree at)
2695 {
2696   aff_tree aff;
2697   tree type = TREE_TYPE (use->iv->base);
2698
2699   if (!get_computation_aff (loop, use, cand, at, &aff))
2700     return NULL_TREE;
2701   unshare_aff_combination (&aff);
2702   return fold_convert (type, aff_combination_to_tree (&aff));
2703 }
2704
2705 /* Determines the expression by that USE is expressed from induction variable
2706    CAND in LOOP.  The computation is unshared.  */
2707
2708 static tree
2709 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2710 {
2711   return get_computation_at (loop, use, cand, use->stmt);
2712 }
2713
2714 /* Returns cost of addition in MODE.  */
2715
2716 static unsigned
2717 add_cost (enum machine_mode mode)
2718 {
2719   static unsigned costs[NUM_MACHINE_MODES];
2720   rtx seq;
2721   unsigned cost;
2722
2723   if (costs[mode])
2724     return costs[mode];
2725
2726   start_sequence ();
2727   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2728                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2729                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2730                  NULL_RTX);
2731   seq = get_insns ();
2732   end_sequence ();
2733
2734   cost = seq_cost (seq);
2735   if (!cost)
2736     cost = 1;
2737
2738   costs[mode] = cost;
2739
2740   if (dump_file && (dump_flags & TDF_DETAILS))
2741     fprintf (dump_file, "Addition in %s costs %d\n",
2742              GET_MODE_NAME (mode), cost);
2743   return cost;
2744 }
2745
2746 /* Entry in a hashtable of already known costs for multiplication.  */
2747 struct mbc_entry
2748 {
2749   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2750   enum machine_mode mode;       /* In mode.  */
2751   unsigned cost;                /* The cost.  */
2752 };
2753
2754 /* Counts hash value for the ENTRY.  */
2755
2756 static hashval_t
2757 mbc_entry_hash (const void *entry)
2758 {
2759   const struct mbc_entry *e = entry;
2760
2761   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2762 }
2763
2764 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2765
2766 static int
2767 mbc_entry_eq (const void *entry1, const void *entry2)
2768 {
2769   const struct mbc_entry *e1 = entry1;
2770   const struct mbc_entry *e2 = entry2;
2771
2772   return (e1->mode == e2->mode
2773           && e1->cst == e2->cst);
2774 }
2775
2776 /* Returns cost of multiplication by constant CST in MODE.  */
2777
2778 unsigned
2779 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2780 {
2781   static htab_t costs;
2782   struct mbc_entry **cached, act;
2783   rtx seq;
2784   unsigned cost;
2785
2786   if (!costs)
2787     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2788
2789   act.mode = mode;
2790   act.cst = cst;
2791   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2792   if (*cached)
2793     return (*cached)->cost;
2794
2795   *cached = XNEW (struct mbc_entry);
2796   (*cached)->mode = mode;
2797   (*cached)->cst = cst;
2798
2799   start_sequence ();
2800   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2801                gen_int_mode (cst, mode), NULL_RTX, 0);
2802   seq = get_insns ();
2803   end_sequence ();
2804
2805   cost = seq_cost (seq);
2806
2807   if (dump_file && (dump_flags & TDF_DETAILS))
2808     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2809              (int) cst, GET_MODE_NAME (mode), cost);
2810
2811   (*cached)->cost = cost;
2812
2813   return cost;
2814 }
2815
2816 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2817    validity for a memory reference accessing memory of mode MODE.  */
2818
2819 bool
2820 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2821 {
2822 #define MAX_RATIO 128
2823   static sbitmap valid_mult[MAX_MACHINE_MODE];
2824
2825   if (!valid_mult[mode])
2826     {
2827       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2828       rtx addr;
2829       HOST_WIDE_INT i;
2830
2831       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2832       sbitmap_zero (valid_mult[mode]);
2833       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2834       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2835         {
2836           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2837           if (memory_address_p (mode, addr))
2838             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2839         }
2840
2841       if (dump_file && (dump_flags & TDF_DETAILS))
2842         {
2843           fprintf (dump_file, "  allowed multipliers:");
2844           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2845             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2846               fprintf (dump_file, " %d", (int) i);
2847           fprintf (dump_file, "\n");
2848           fprintf (dump_file, "\n");
2849         }
2850     }
2851
2852   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2853     return false;
2854
2855   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2856 }
2857
2858 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2859    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2860    variable is omitted.  Compute the cost for a memory reference that accesses
2861    a memory location of mode MEM_MODE.
2862
2863    TODO -- there must be some better way.  This all is quite crude.  */
2864
2865 static unsigned
2866 get_address_cost (bool symbol_present, bool var_present,
2867                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2868                   enum machine_mode mem_mode)
2869 {
2870   static bool initialized[MAX_MACHINE_MODE];
2871   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2872   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2873   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2874   unsigned cost, acost;
2875   bool offset_p, ratio_p;
2876   HOST_WIDE_INT s_offset;
2877   unsigned HOST_WIDE_INT mask;
2878   unsigned bits;
2879
2880   if (!initialized[mem_mode])
2881     {
2882       HOST_WIDE_INT i;
2883       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2884       int old_cse_not_expected;
2885       unsigned sym_p, var_p, off_p, rat_p, add_c;
2886       rtx seq, addr, base;
2887       rtx reg0, reg1;
2888
2889       initialized[mem_mode] = true;
2890
2891       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2892
2893       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2894       for (i = start; i <= 1 << 20; i <<= 1)
2895         {
2896           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2897           if (!memory_address_p (mem_mode, addr))
2898             break;
2899         }
2900       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2901       off[mem_mode] = max_offset[mem_mode];
2902
2903       for (i = start; i <= 1 << 20; i <<= 1)
2904         {
2905           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2906           if (!memory_address_p (mem_mode, addr))
2907             break;
2908         }
2909       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2910
2911       if (dump_file && (dump_flags & TDF_DETAILS))
2912         {
2913           fprintf (dump_file, "get_address_cost:\n");
2914           fprintf (dump_file, "  min offset %s %d\n",
2915                    GET_MODE_NAME (mem_mode),
2916                    (int) min_offset[mem_mode]);
2917           fprintf (dump_file, "  max offset %s %d\n",
2918                    GET_MODE_NAME (mem_mode),
2919                    (int) max_offset[mem_mode]);
2920         }
2921
2922       rat[mem_mode] = 1;
2923       for (i = 2; i <= MAX_RATIO; i++)
2924         if (multiplier_allowed_in_address_p (i, mem_mode))
2925           {
2926             rat[mem_mode] = i;
2927             break;
2928           }
2929
2930       /* Compute the cost of various addressing modes.  */
2931       acost = 0;
2932       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2933       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2934
2935       for (i = 0; i < 16; i++)
2936         {
2937           sym_p = i & 1;
2938           var_p = (i >> 1) & 1;
2939           off_p = (i >> 2) & 1;
2940           rat_p = (i >> 3) & 1;
2941
2942           addr = reg0;
2943           if (rat_p)
2944             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2945                                    gen_int_mode (rat[mem_mode], Pmode));
2946
2947           if (var_p)
2948             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2949
2950           if (sym_p)
2951             {
2952               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2953               /* ??? We can run into trouble with some backends by presenting
2954                  it with symbols which havn't been properly passed through
2955                  targetm.encode_section_info.  By setting the local bit, we
2956                  enhance the probability of things working.  */
2957               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
2958
2959               if (off_p)
2960                 base = gen_rtx_fmt_e (CONST, Pmode,
2961                                       gen_rtx_fmt_ee (PLUS, Pmode,
2962                                                       base,
2963                                                       gen_int_mode (off[mem_mode],
2964                                                                     Pmode)));
2965             }
2966           else if (off_p)
2967             base = gen_int_mode (off[mem_mode], Pmode);
2968           else
2969             base = NULL_RTX;
2970
2971           if (base)
2972             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2973
2974           start_sequence ();
2975           /* To avoid splitting addressing modes, pretend that no cse will
2976              follow.  */
2977           old_cse_not_expected = cse_not_expected;
2978           cse_not_expected = true;
2979           addr = memory_address (mem_mode, addr);
2980           cse_not_expected = old_cse_not_expected;
2981           seq = get_insns ();
2982           end_sequence ();
2983
2984           acost = seq_cost (seq);
2985           acost += address_cost (addr, mem_mode);
2986
2987           if (!acost)
2988             acost = 1;
2989           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
2990         }
2991
2992       /* On some targets, it is quite expensive to load symbol to a register,
2993          which makes addresses that contain symbols look much more expensive.
2994          However, the symbol will have to be loaded in any case before the
2995          loop (and quite likely we have it in register already), so it does not
2996          make much sense to penalize them too heavily.  So make some final
2997          tweaks for the SYMBOL_PRESENT modes:
2998
2999          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3000          var is cheaper, use this mode with small penalty.
3001          If VAR_PRESENT is true, try whether the mode with
3002          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3003          if this is the case, use it.  */
3004       add_c = add_cost (Pmode);
3005       for (i = 0; i < 8; i++)
3006         {
3007           var_p = i & 1;
3008           off_p = (i >> 1) & 1;
3009           rat_p = (i >> 2) & 1;
3010
3011           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3012           if (var_p)
3013             acost += add_c;
3014
3015           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3016             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3017         }
3018
3019       if (dump_file && (dump_flags & TDF_DETAILS))
3020         {
3021           fprintf (dump_file, "Address costs:\n");
3022
3023           for (i = 0; i < 16; i++)
3024             {
3025               sym_p = i & 1;
3026               var_p = (i >> 1) & 1;
3027               off_p = (i >> 2) & 1;
3028               rat_p = (i >> 3) & 1;
3029
3030               fprintf (dump_file, "  ");
3031               if (sym_p)
3032                 fprintf (dump_file, "sym + ");
3033               if (var_p)
3034                 fprintf (dump_file, "var + ");
3035               if (off_p)
3036                 fprintf (dump_file, "cst + ");
3037               if (rat_p)
3038                 fprintf (dump_file, "rat * ");
3039
3040               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3041               fprintf (dump_file, "index costs %d\n", acost);
3042             }
3043           fprintf (dump_file, "\n");
3044         }
3045     }
3046
3047   bits = GET_MODE_BITSIZE (Pmode);
3048   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3049   offset &= mask;
3050   if ((offset >> (bits - 1) & 1))
3051     offset |= ~mask;
3052   s_offset = offset;
3053
3054   cost = 0;
3055   offset_p = (s_offset != 0
3056               && min_offset[mem_mode] <= s_offset
3057               && s_offset <= max_offset[mem_mode]);
3058   ratio_p = (ratio != 1
3059              && multiplier_allowed_in_address_p (ratio, mem_mode));
3060
3061   if (ratio != 1 && !ratio_p)
3062     cost += multiply_by_cost (ratio, Pmode);
3063
3064   if (s_offset && !offset_p && !symbol_present)
3065     cost += add_cost (Pmode);
3066
3067   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3068   return cost + acost;
3069 }
3070
3071 /* Estimates cost of forcing expression EXPR into a variable.  */
3072
3073 unsigned
3074 force_expr_to_var_cost (tree expr)
3075 {
3076   static bool costs_initialized = false;
3077   static unsigned integer_cost;
3078   static unsigned symbol_cost;
3079   static unsigned address_cost;
3080   tree op0, op1;
3081   unsigned cost0, cost1, cost;
3082   enum machine_mode mode;
3083
3084   if (!costs_initialized)
3085     {
3086       tree type = build_pointer_type (integer_type_node);
3087       tree var, addr;
3088       rtx x;
3089
3090       var = create_tmp_var_raw (integer_type_node, "test_var");
3091       TREE_STATIC (var) = 1;
3092       x = produce_memory_decl_rtl (var, NULL);
3093       SET_DECL_RTL (var, x);
3094
3095       integer_cost = computation_cost (build_int_cst (integer_type_node,
3096                                                       2000));
3097
3098       addr = build1 (ADDR_EXPR, type, var);
3099       symbol_cost = computation_cost (addr) + 1;
3100
3101       address_cost
3102         = computation_cost (build2 (PLUS_EXPR, type,
3103                                     addr,
3104                                     build_int_cst (type, 2000))) + 1;
3105       if (dump_file && (dump_flags & TDF_DETAILS))
3106         {
3107           fprintf (dump_file, "force_expr_to_var_cost:\n");
3108           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3109           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3110           fprintf (dump_file, "  address %d\n", (int) address_cost);
3111           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3112           fprintf (dump_file, "\n");
3113         }
3114
3115       costs_initialized = true;
3116     }
3117
3118   STRIP_NOPS (expr);
3119
3120   if (SSA_VAR_P (expr))
3121     return 0;
3122
3123   if (TREE_INVARIANT (expr))
3124     {
3125       if (TREE_CODE (expr) == INTEGER_CST)
3126         return integer_cost;
3127
3128       if (TREE_CODE (expr) == ADDR_EXPR)
3129         {
3130           tree obj = TREE_OPERAND (expr, 0);
3131
3132           if (TREE_CODE (obj) == VAR_DECL
3133               || TREE_CODE (obj) == PARM_DECL
3134               || TREE_CODE (obj) == RESULT_DECL)
3135             return symbol_cost;
3136         }
3137
3138       return address_cost;
3139     }
3140
3141   switch (TREE_CODE (expr))
3142     {
3143     case PLUS_EXPR:
3144     case MINUS_EXPR:
3145     case MULT_EXPR:
3146       op0 = TREE_OPERAND (expr, 0);
3147       op1 = TREE_OPERAND (expr, 1);
3148       STRIP_NOPS (op0);
3149       STRIP_NOPS (op1);
3150
3151       if (is_gimple_val (op0))
3152         cost0 = 0;
3153       else
3154         cost0 = force_expr_to_var_cost (op0);
3155
3156       if (is_gimple_val (op1))
3157         cost1 = 0;
3158       else
3159         cost1 = force_expr_to_var_cost (op1);
3160
3161       break;
3162
3163     default:
3164       /* Just an arbitrary value, FIXME.  */
3165       return target_spill_cost;
3166     }
3167
3168   mode = TYPE_MODE (TREE_TYPE (expr));
3169   switch (TREE_CODE (expr))
3170     {
3171     case PLUS_EXPR:
3172     case MINUS_EXPR:
3173       cost = add_cost (mode);
3174       break;
3175
3176     case MULT_EXPR:
3177       if (cst_and_fits_in_hwi (op0))
3178         cost = multiply_by_cost (int_cst_value (op0), mode);
3179       else if (cst_and_fits_in_hwi (op1))
3180         cost = multiply_by_cost (int_cst_value (op1), mode);
3181       else
3182         return target_spill_cost;
3183       break;
3184
3185     default:
3186       gcc_unreachable ();
3187     }
3188
3189   cost += cost0;
3190   cost += cost1;
3191
3192   /* Bound the cost by target_spill_cost.  The parts of complicated
3193      computations often are either loop invariant or at least can
3194      be shared between several iv uses, so letting this grow without
3195      limits would not give reasonable results.  */
3196   return cost < target_spill_cost ? cost : target_spill_cost;
3197 }
3198
3199 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3200    invariants the computation depends on.  */
3201
3202 static unsigned
3203 force_var_cost (struct ivopts_data *data,
3204                 tree expr, bitmap *depends_on)
3205 {
3206   if (depends_on)
3207     {
3208       fd_ivopts_data = data;
3209       walk_tree (&expr, find_depends, depends_on, NULL);
3210     }
3211
3212   return force_expr_to_var_cost (expr);
3213 }
3214
3215 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3216    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3217    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3218    invariants the computation depends on.  */
3219
3220 static unsigned
3221 split_address_cost (struct ivopts_data *data,
3222                     tree addr, bool *symbol_present, bool *var_present,
3223                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3224 {
3225   tree core;
3226   HOST_WIDE_INT bitsize;
3227   HOST_WIDE_INT bitpos;
3228   tree toffset;
3229   enum machine_mode mode;
3230   int unsignedp, volatilep;
3231
3232   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3233                               &unsignedp, &volatilep, false);
3234
3235   if (toffset != 0
3236       || bitpos % BITS_PER_UNIT != 0
3237       || TREE_CODE (core) != VAR_DECL)
3238     {
3239       *symbol_present = false;
3240       *var_present = true;
3241       fd_ivopts_data = data;
3242       walk_tree (&addr, find_depends, depends_on, NULL);
3243       return target_spill_cost;
3244     }
3245
3246   *offset += bitpos / BITS_PER_UNIT;
3247   if (TREE_STATIC (core)
3248       || DECL_EXTERNAL (core))
3249     {
3250       *symbol_present = true;
3251       *var_present = false;
3252       return 0;
3253     }
3254
3255   *symbol_present = false;
3256   *var_present = true;
3257   return 0;
3258 }
3259
3260 /* Estimates cost of expressing difference of addresses E1 - E2 as
3261    var + symbol + offset.  The value of offset is added to OFFSET,
3262    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3263    part is missing.  DEPENDS_ON is a set of the invariants the computation
3264    depends on.  */
3265
3266 static unsigned
3267 ptr_difference_cost (struct ivopts_data *data,
3268                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3269                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3270 {
3271   HOST_WIDE_INT diff = 0;
3272   unsigned cost;
3273
3274   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3275
3276   if (ptr_difference_const (e1, e2, &diff))
3277     {
3278       *offset += diff;
3279       *symbol_present = false;
3280       *var_present = false;
3281       return 0;
3282     }
3283
3284   if (e2 == integer_zero_node)
3285     return split_address_cost (data, TREE_OPERAND (e1, 0),
3286                                symbol_present, var_present, offset, depends_on);
3287
3288   *symbol_present = false;
3289   *var_present = true;
3290
3291   cost = force_var_cost (data, e1, depends_on);
3292   cost += force_var_cost (data, e2, depends_on);
3293   cost += add_cost (Pmode);
3294
3295   return cost;
3296 }
3297
3298 /* Estimates cost of expressing difference E1 - E2 as
3299    var + symbol + offset.  The value of offset is added to OFFSET,
3300    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3301    part is missing.  DEPENDS_ON is a set of the invariants the computation
3302    depends on.  */
3303
3304 static unsigned
3305 difference_cost (struct ivopts_data *data,
3306                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3307                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3308 {
3309   unsigned cost;
3310   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3311   unsigned HOST_WIDE_INT off1, off2;
3312
3313   e1 = strip_offset (e1, &off1);
3314   e2 = strip_offset (e2, &off2);
3315   *offset += off1 - off2;
3316
3317   STRIP_NOPS (e1);
3318   STRIP_NOPS (e2);
3319
3320   if (TREE_CODE (e1) == ADDR_EXPR)
3321     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3322                                 depends_on);
3323   *symbol_present = false;
3324
3325   if (operand_equal_p (e1, e2, 0))
3326     {
3327       *var_present = false;
3328       return 0;
3329     }
3330   *var_present = true;
3331   if (integer_zerop (e2))
3332     return force_var_cost (data, e1, depends_on);
3333
3334   if (integer_zerop (e1))
3335     {
3336       cost = force_var_cost (data, e2, depends_on);
3337       cost += multiply_by_cost (-1, mode);
3338
3339       return cost;
3340     }
3341
3342   cost = force_var_cost (data, e1, depends_on);
3343   cost += force_var_cost (data, e2, depends_on);
3344   cost += add_cost (mode);
3345
3346   return cost;
3347 }
3348
3349 /* Determines the cost of the computation by that USE is expressed
3350    from induction variable CAND.  If ADDRESS_P is true, we just need
3351    to create an address from it, otherwise we want to get it into
3352    register.  A set of invariants we depend on is stored in
3353    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3354
3355 static unsigned
3356 get_computation_cost_at (struct ivopts_data *data,
3357                          struct iv_use *use, struct iv_cand *cand,
3358                          bool address_p, bitmap *depends_on, tree at)
3359 {
3360   tree ubase = use->iv->base, ustep = use->iv->step;
3361   tree cbase, cstep;
3362   tree utype = TREE_TYPE (ubase), ctype;
3363   unsigned HOST_WIDE_INT cstepi, offset = 0;
3364   HOST_WIDE_INT ratio, aratio;
3365   bool var_present, symbol_present;
3366   unsigned cost = 0, n_sums;
3367   double_int rat;
3368
3369   *depends_on = NULL;
3370
3371   /* Only consider real candidates.  */
3372   if (!cand->iv)
3373     return INFTY;
3374
3375   cbase = cand->iv->base;
3376   cstep = cand->iv->step;
3377   ctype = TREE_TYPE (cbase);
3378
3379   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3380     {
3381       /* We do not have a precision to express the values of use.  */
3382       return INFTY;
3383     }
3384
3385   if (address_p)
3386     {
3387       /* Do not try to express address of an object with computation based
3388          on address of a different object.  This may cause problems in rtl
3389          level alias analysis (that does not expect this to be happening,
3390          as this is illegal in C), and would be unlikely to be useful
3391          anyway.  */
3392       if (use->iv->base_object
3393           && cand->iv->base_object
3394           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3395         return INFTY;
3396     }
3397
3398   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3399     {
3400       /* TODO -- add direct handling of this case.  */
3401       goto fallback;
3402     }
3403
3404   /* CSTEPI is removed from the offset in case statement is after the
3405      increment.  If the step is not constant, we use zero instead.
3406      This is a bit imprecise (there is the extra addition), but
3407      redundancy elimination is likely to transform the code so that
3408      it uses value of the variable before increment anyway,
3409      so it is not that much unrealistic.  */
3410   if (cst_and_fits_in_hwi (cstep))
3411     cstepi = int_cst_value (cstep);
3412   else
3413     cstepi = 0;
3414
3415   if (!constant_multiple_of (ustep, cstep, &rat))
3416     return INFTY;
3417
3418   if (double_int_fits_in_shwi_p (rat))
3419     ratio = double_int_to_shwi (rat);
3420   else
3421     return INFTY;
3422
3423   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3424      or ratio == 1, it is better to handle this like
3425
3426      ubase - ratio * cbase + ratio * var
3427
3428      (also holds in the case ratio == -1, TODO.  */
3429
3430   if (cst_and_fits_in_hwi (cbase))
3431     {
3432       offset = - ratio * int_cst_value (cbase);
3433       cost += difference_cost (data,
3434                                ubase, integer_zero_node,
3435                                &symbol_present, &var_present, &offset,
3436                                depends_on);
3437     }
3438   else if (ratio == 1)
3439     {
3440       cost += difference_cost (data,
3441                                ubase, cbase,
3442                                &symbol_present, &var_present, &offset,
3443                                depends_on);
3444     }
3445   else
3446     {
3447       cost += force_var_cost (data, cbase, depends_on);
3448       cost += add_cost (TYPE_MODE (ctype));
3449       cost += difference_cost (data,
3450                                ubase, integer_zero_node,
3451                                &symbol_present, &var_present, &offset,
3452                                depends_on);
3453     }
3454
3455   /* If we are after the increment, the value of the candidate is higher by
3456      one iteration.  */
3457   if (stmt_after_increment (data->current_loop, cand, at))
3458     offset -= ratio * cstepi;
3459
3460   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3461      (symbol/var/const parts may be omitted).  If we are looking for an address,
3462      find the cost of addressing this.  */
3463   if (address_p)
3464     return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3465                                     TYPE_MODE (TREE_TYPE (*use->op_p)));
3466
3467   /* Otherwise estimate the costs for computing the expression.  */
3468   aratio = ratio > 0 ? ratio : -ratio;
3469   if (!symbol_present && !var_present && !offset)
3470     {
3471       if (ratio != 1)
3472         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3473
3474       return cost;
3475     }
3476
3477   if (aratio != 1)
3478     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3479
3480   n_sums = 1;
3481   if (var_present
3482       /* Symbol + offset should be compile-time computable.  */
3483       && (symbol_present || offset))
3484     n_sums++;
3485
3486   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3487
3488 fallback:
3489   {
3490     /* Just get the expression, expand it and measure the cost.  */
3491     tree comp = get_computation_at (data->current_loop, use, cand, at);
3492
3493     if (!comp)
3494       return INFTY;
3495
3496     if (address_p)
3497       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3498
3499     return computation_cost (comp);
3500   }
3501 }
3502
3503 /* Determines the cost of the computation by that USE is expressed
3504    from induction variable CAND.  If ADDRESS_P is true, we just need
3505    to create an address from it, otherwise we want to get it into
3506    register.  A set of invariants we depend on is stored in
3507    DEPENDS_ON.  */
3508
3509 static unsigned
3510 get_computation_cost (struct ivopts_data *data,
3511                       struct iv_use *use, struct iv_cand *cand,
3512                       bool address_p, bitmap *depends_on)
3513 {
3514   return get_computation_cost_at (data,
3515                                   use, cand, address_p, depends_on, use->stmt);
3516 }
3517
3518 /* Determines cost of basing replacement of USE on CAND in a generic
3519    expression.  */
3520
3521 static bool
3522 determine_use_iv_cost_generic (struct ivopts_data *data,
3523                                struct iv_use *use, struct iv_cand *cand)
3524 {
3525   bitmap depends_on;
3526   unsigned cost;
3527
3528   /* The simple case first -- if we need to express value of the preserved
3529      original biv, the cost is 0.  This also prevents us from counting the
3530      cost of increment twice -- once at this use and once in the cost of
3531      the candidate.  */
3532   if (cand->pos == IP_ORIGINAL
3533       && cand->incremented_at == use->stmt)
3534     {
3535       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3536       return true;
3537     }
3538
3539   cost = get_computation_cost (data, use, cand, false, &depends_on);
3540   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3541
3542   return cost != INFTY;
3543 }
3544
3545 /* Determines cost of basing replacement of USE on CAND in an address.  */
3546
3547 static bool
3548 determine_use_iv_cost_address (struct ivopts_data *data,
3549                                struct iv_use *use, struct iv_cand *cand)
3550 {
3551   bitmap depends_on;
3552   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3553
3554   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3555
3556   return cost != INFTY;
3557 }
3558
3559 /* Computes value of candidate CAND at position AT in iteration NITER, and
3560    stores it to VAL.  */
3561
3562 static void
3563 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3564                aff_tree *val)
3565 {
3566   aff_tree step, delta, nit;
3567   struct iv *iv = cand->iv;
3568   tree type = TREE_TYPE (iv->base);
3569
3570   tree_to_aff_combination (iv->step, type, &step);
3571   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3572   aff_combination_convert (&nit, type);
3573   aff_combination_mult (&nit, &step, &delta);
3574   if (stmt_after_increment (loop, cand, at))
3575     aff_combination_add (&delta, &step);
3576
3577   tree_to_aff_combination (iv->base, type, val);
3578   aff_combination_add (val, &delta);
3579 }
3580
3581 /* Returns period of induction variable iv.  */
3582
3583 static tree
3584 iv_period (struct iv *iv)
3585 {
3586   tree step = iv->step, period, type;
3587   tree pow2div;
3588
3589   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3590
3591   /* Period of the iv is gcd (step, type range).  Since type range is power
3592      of two, it suffices to determine the maximum power of two that divides
3593      step.  */
3594   pow2div = num_ending_zeros (step);
3595   type = unsigned_type_for (TREE_TYPE (step));
3596
3597   period = build_low_bits_mask (type,
3598                                 (TYPE_PRECISION (type)
3599                                  - tree_low_cst (pow2div, 1)));
3600
3601   return period;
3602 }
3603
3604 /* Returns the comparison operator used when eliminating the iv USE.  */
3605
3606 static enum tree_code
3607 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3608 {
3609   struct loop *loop = data->current_loop;
3610   basic_block ex_bb;
3611   edge exit;
3612
3613   ex_bb = bb_for_stmt (use->stmt);
3614   exit = EDGE_SUCC (ex_bb, 0);
3615   if (flow_bb_inside_loop_p (loop, exit->dest))
3616     exit = EDGE_SUCC (ex_bb, 1);
3617
3618   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3619 }
3620
3621 /* Check whether it is possible to express the condition in USE by comparison
3622    of candidate CAND.  If so, store the value compared with to BOUND.  */
3623
3624 static bool
3625 may_eliminate_iv (struct ivopts_data *data,
3626                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3627 {
3628   basic_block ex_bb;
3629   edge exit;
3630   tree nit, nit_type;
3631   tree wider_type, period, per_type;
3632   struct loop *loop = data->current_loop;
3633   aff_tree bnd;
3634
3635   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3636     return false;
3637
3638   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3639      for other conditions inside loop body.  */
3640   ex_bb = bb_for_stmt (use->stmt);
3641   if (use->stmt != last_stmt (ex_bb)
3642       || TREE_CODE (use->stmt) != COND_EXPR)
3643     return false;
3644   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3645     return false;
3646
3647   exit = EDGE_SUCC (ex_bb, 0);
3648   if (flow_bb_inside_loop_p (loop, exit->dest))
3649     exit = EDGE_SUCC (ex_bb, 1);
3650   if (flow_bb_inside_loop_p (loop, exit->dest))
3651     return false;
3652
3653   nit = niter_for_exit (data, exit);
3654   if (!nit)
3655     return false;
3656
3657   nit_type = TREE_TYPE (nit);
3658
3659   /* Determine whether we may use the variable to test whether niter iterations
3660      elapsed.  This is the case iff the period of the induction variable is
3661      greater than the number of iterations.  */
3662   period = iv_period (cand->iv);
3663   if (!period)
3664     return false;
3665   per_type = TREE_TYPE (period);
3666
3667   wider_type = TREE_TYPE (period);
3668   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3669     wider_type = per_type;
3670   else
3671     wider_type = nit_type;
3672
3673   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3674                                       fold_convert (wider_type, period),
3675                                       fold_convert (wider_type, nit))))
3676     return false;
3677
3678   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3679   *bound = aff_combination_to_tree (&bnd);
3680   return true;
3681 }
3682
3683 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3684
3685 static bool
3686 determine_use_iv_cost_condition (struct ivopts_data *data,
3687                                  struct iv_use *use, struct iv_cand *cand)
3688 {
3689   tree bound = NULL_TREE;
3690   struct iv *cmp_iv;
3691   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3692   unsigned elim_cost, express_cost, cost;
3693   bool ok;
3694
3695   /* Only consider real candidates.  */
3696   if (!cand->iv)
3697     {
3698       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3699       return false;
3700     }
3701
3702   /* Try iv elimination.  */
3703   if (may_eliminate_iv (data, use, cand, &bound))
3704     elim_cost = force_var_cost (data, bound, &depends_on_elim);
3705   else
3706     elim_cost = INFTY;
3707
3708   /* Try expressing the original giv.  If it is compared with an invariant,
3709      note that we cannot get rid of it.  */
3710   ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3711   gcc_assert (ok);
3712
3713   express_cost = get_computation_cost (data, use, cand, false,
3714                                        &depends_on_express);
3715   fd_ivopts_data = data;
3716   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3717
3718   /* Choose the better approach.  */
3719   if (elim_cost < express_cost)
3720     {
3721       cost = elim_cost;
3722       depends_on = depends_on_elim;
3723       depends_on_elim = NULL;
3724     }
3725   else
3726     {
3727       cost = express_cost;
3728       depends_on = depends_on_express;
3729       depends_on_express = NULL;
3730       bound = NULL_TREE;
3731     }
3732
3733   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3734
3735   if (depends_on_elim)
3736     BITMAP_FREE (depends_on_elim);
3737   if (depends_on_express)
3738     BITMAP_FREE (depends_on_express);
3739
3740   return cost != INFTY;
3741 }
3742
3743 /* Determines cost of basing replacement of USE on CAND.  Returns false
3744    if USE cannot be based on CAND.  */
3745
3746 static bool
3747 determine_use_iv_cost (struct ivopts_data *data,
3748                        struct iv_use *use, struct iv_cand *cand)
3749 {
3750   switch (use->type)
3751     {
3752     case USE_NONLINEAR_EXPR:
3753       return determine_use_iv_cost_generic (data, use, cand);
3754
3755     case USE_ADDRESS:
3756       return determine_use_iv_cost_address (data, use, cand);
3757
3758     case USE_COMPARE:
3759       return determine_use_iv_cost_condition (data, use, cand);
3760
3761     default:
3762       gcc_unreachable ();
3763     }
3764 }
3765
3766 /* Determines costs of basing the use of the iv on an iv candidate.  */
3767
3768 static void
3769 determine_use_iv_costs (struct ivopts_data *data)
3770 {
3771   unsigned i, j;
3772   struct iv_use *use;
3773   struct iv_cand *cand;
3774   bitmap to_clear = BITMAP_ALLOC (NULL);
3775
3776   alloc_use_cost_map (data);
3777
3778   for (i = 0; i < n_iv_uses (data); i++)
3779     {
3780       use = iv_use (data, i);
3781
3782       if (data->consider_all_candidates)
3783         {
3784           for (j = 0; j < n_iv_cands (data); j++)
3785             {
3786               cand = iv_cand (data, j);
3787               determine_use_iv_cost (data, use, cand);
3788             }
3789         }
3790       else
3791         {
3792           bitmap_iterator bi;
3793
3794           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3795             {
3796               cand = iv_cand (data, j);
3797               if (!determine_use_iv_cost (data, use, cand))
3798                 bitmap_set_bit (to_clear, j);
3799             }
3800
3801           /* Remove the candidates for that the cost is infinite from
3802              the list of related candidates.  */
3803           bitmap_and_compl_into (use->related_cands, to_clear);
3804           bitmap_clear (to_clear);
3805         }
3806     }
3807
3808   BITMAP_FREE (to_clear);
3809
3810   if (dump_file && (dump_flags & TDF_DETAILS))
3811     {
3812       fprintf (dump_file, "Use-candidate costs:\n");
3813
3814       for (i = 0; i < n_iv_uses (data); i++)
3815         {
3816           use = iv_use (data, i);
3817
3818           fprintf (dump_file, "Use %d:\n", i);
3819           fprintf (dump_file, "  cand\tcost\tdepends on\n");
3820           for (j = 0; j < use->n_map_members; j++)
3821             {
3822               if (!use->cost_map[j].cand
3823                   || use->cost_map[j].cost == INFTY)
3824                 continue;
3825
3826               fprintf (dump_file, "  %d\t%d\t",
3827                        use->cost_map[j].cand->id,
3828                        use->cost_map[j].cost);
3829               if (use->cost_map[j].depends_on)
3830                 bitmap_print (dump_file,
3831                               use->cost_map[j].depends_on, "","");
3832               fprintf (dump_file, "\n");
3833             }
3834
3835           fprintf (dump_file, "\n");
3836         }
3837       fprintf (dump_file, "\n");
3838     }
3839 }
3840
3841 /* Determines cost of the candidate CAND.  */
3842
3843 static void
3844 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3845 {
3846   unsigned cost_base, cost_step;
3847   tree base;
3848
3849   if (!cand->iv)
3850     {
3851       cand->cost = 0;
3852       return;
3853     }
3854
3855   /* There are two costs associated with the candidate -- its increment
3856      and its initialization.  The second is almost negligible for any loop
3857      that rolls enough, so we take it just very little into account.  */
3858
3859   base = cand->iv->base;
3860   cost_base = force_var_cost (data, base, NULL);
3861   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3862
3863   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3864
3865   /* Prefer the original iv unless we may gain something by replacing it;
3866      this is not really relevant for artificial ivs created by other
3867      passes.  */
3868   if (cand->pos == IP_ORIGINAL
3869       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3870     cand->cost--;
3871
3872   /* Prefer not to insert statements into latch unless there are some
3873      already (so that we do not create unnecessary jumps).  */
3874   if (cand->pos == IP_END
3875       && empty_block_p (ip_end_pos (data->current_loop)))
3876     cand->cost++;
3877 }
3878
3879 /* Determines costs of computation of the candidates.  */
3880
3881 static void
3882 determine_iv_costs (struct ivopts_data *data)
3883 {
3884   unsigned i;
3885
3886   if (dump_file && (dump_flags & TDF_DETAILS))
3887     {
3888       fprintf (dump_file, "Candidate costs:\n");
3889       fprintf (dump_file, "  cand\tcost\n");
3890     }
3891
3892   for (i = 0; i < n_iv_cands (data); i++)
3893     {
3894       struct iv_cand *cand = iv_cand (data, i);
3895
3896       determine_iv_cost (data, cand);
3897
3898       if (dump_file && (dump_flags & TDF_DETAILS))
3899         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3900     }
3901
3902   if (dump_file && (dump_flags & TDF_DETAILS))
3903     fprintf (dump_file, "\n");
3904 }
3905
3906 /* Calculates cost for having SIZE induction variables.  */
3907
3908 static unsigned
3909 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3910 {
3911   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
3912 }
3913
3914 /* For each size of the induction variable set determine the penalty.  */
3915
3916 static void
3917 determine_set_costs (struct ivopts_data *data)
3918 {
3919   unsigned j, n;
3920   tree phi, op;
3921   struct loop *loop = data->current_loop;
3922   bitmap_iterator bi;
3923
3924   /* We use the following model (definitely improvable, especially the
3925      cost function -- TODO):
3926
3927      We estimate the number of registers available (using MD data), name it A.
3928
3929      We estimate the number of registers used by the loop, name it U.  This
3930      number is obtained as the number of loop phi nodes (not counting virtual
3931      registers and bivs) + the number of variables from outside of the loop.
3932
3933      We set a reserve R (free regs that are used for temporary computations,
3934      etc.).  For now the reserve is a constant 3.
3935
3936      Let I be the number of induction variables.
3937
3938      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3939         make a lot of ivs without a reason).
3940      -- if A - R < U + I <= A, the cost is I * PRES_COST
3941      -- if U + I > A, the cost is I * PRES_COST and
3942         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
3943
3944   if (dump_file && (dump_flags & TDF_DETAILS))
3945     {
3946       fprintf (dump_file, "Global costs:\n");
3947       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
3948       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
3949       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
3950       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
3951     }
3952
3953   n = 0;
3954   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3955     {
3956       op = PHI_RESULT (phi);
3957
3958       if (!is_gimple_reg (op))
3959         continue;
3960
3961       if (get_iv (data, op))
3962         continue;
3963
3964       n++;
3965     }
3966
3967   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3968     {
3969       struct version_info *info = ver_info (data, j);
3970
3971       if (info->inv_id && info->has_nonlin_use)
3972         n++;
3973     }
3974
3975   data->regs_used = n;
3976   if (dump_file && (dump_flags & TDF_DETAILS))
3977     fprintf (dump_file, "  regs_used %d\n", n);
3978
3979   if (dump_file && (dump_flags & TDF_DETAILS))
3980     {
3981       fprintf (dump_file, "  cost for size:\n");
3982       fprintf (dump_file, "  ivs\tcost\n");
3983       for (j = 0; j <= 2 * target_avail_regs; j++)
3984         fprintf (dump_file, "  %d\t%d\n", j,
3985                  ivopts_global_cost_for_size (data, j));
3986       fprintf (dump_file, "\n");
3987     }
3988 }
3989
3990 /* Returns true if A is a cheaper cost pair than B.  */
3991
3992 static bool
3993 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3994 {
3995   if (!a)
3996     return false;
3997
3998   if (!b)
3999     return true;
4000
4001   if (a->cost < b->cost)
4002     return true;
4003
4004   if (a->cost > b->cost)
4005     return false;
4006
4007   /* In case the costs are the same, prefer the cheaper candidate.  */
4008   if (a->cand->cost < b->cand->cost)
4009     return true;
4010
4011   return false;
4012 }
4013
4014 /* Computes the cost field of IVS structure.  */
4015
4016 static void
4017 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4018 {
4019   unsigned cost = 0;
4020
4021   cost += ivs->cand_use_cost;
4022   cost += ivs->cand_cost;
4023   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4024
4025   ivs->cost = cost;
4026 }
4027
4028 /* Remove invariants in set INVS to set IVS.  */
4029
4030 static void
4031 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4032 {
4033   bitmap_iterator bi;
4034   unsigned iid;
4035
4036   if (!invs)
4037     return;
4038
4039   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4040     {
4041       ivs->n_invariant_uses[iid]--;
4042       if (ivs->n_invariant_uses[iid] == 0)
4043         ivs->n_regs--;
4044     }
4045 }
4046
4047 /* Set USE not to be expressed by any candidate in IVS.  */
4048
4049 static void
4050 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4051                  struct iv_use *use)
4052 {
4053   unsigned uid = use->id, cid;
4054   struct cost_pair *cp;
4055
4056   cp = ivs->cand_for_use[uid];
4057   if (!cp)
4058     return;
4059   cid = cp->cand->id;
4060
4061   ivs->bad_uses++;
4062   ivs->cand_for_use[uid] = NULL;
4063   ivs->n_cand_uses[cid]--;
4064
4065   if (ivs->n_cand_uses[cid] == 0)
4066     {
4067       bitmap_clear_bit (ivs->cands, cid);
4068       /* Do not count the pseudocandidates.  */
4069       if (cp->cand->iv)
4070         ivs->n_regs--;
4071       ivs->n_cands--;
4072       ivs->cand_cost -= cp->cand->cost;
4073
4074       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4075     }
4076
4077   ivs->cand_use_cost -= cp->cost;
4078
4079   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4080   iv_ca_recount_cost (data, ivs);
4081 }
4082
4083 /* Add invariants in set INVS to set IVS.  */
4084
4085 static void
4086 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4087 {
4088   bitmap_iterator bi;
4089   unsigned iid;
4090
4091   if (!invs)
4092     return;
4093
4094   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4095     {
4096       ivs->n_invariant_uses[iid]++;
4097       if (ivs->n_invariant_uses[iid] == 1)
4098         ivs->n_regs++;
4099     }
4100 }
4101
4102 /* Set cost pair for USE in set IVS to CP.  */
4103
4104 static void
4105 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4106               struct iv_use *use, struct cost_pair *cp)
4107 {
4108   unsigned uid = use->id, cid;
4109
4110   if (ivs->cand_for_use[uid] == cp)
4111     return;
4112
4113   if (ivs->cand_for_use[uid])
4114     iv_ca_set_no_cp (data, ivs, use);
4115
4116   if (cp)
4117     {
4118       cid = cp->cand->id;
4119
4120       ivs->bad_uses--;
4121       ivs->cand_for_use[uid] = cp;
4122       ivs->n_cand_uses[cid]++;
4123       if (ivs->n_cand_uses[cid] == 1)
4124         {
4125           bitmap_set_bit (ivs->cands, cid);
4126           /* Do not count the pseudocandidates.  */
4127           if (cp->cand->iv)
4128             ivs->n_regs++;
4129           ivs->n_cands++;
4130           ivs->cand_cost += cp->cand->cost;
4131
4132           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4133         }
4134
4135       ivs->cand_use_cost += cp->cost;
4136       iv_ca_set_add_invariants (ivs, cp->depends_on);
4137       iv_ca_recount_cost (data, ivs);
4138     }
4139 }
4140
4141 /* Extend set IVS by expressing USE by some of the candidates in it
4142    if possible.  */
4143
4144 static void
4145 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4146                struct iv_use *use)
4147 {
4148   struct cost_pair *best_cp = NULL, *cp;
4149   bitmap_iterator bi;
4150   unsigned i;
4151
4152   gcc_assert (ivs->upto >= use->id);
4153
4154   if (ivs->upto == use->id)
4155     {
4156       ivs->upto++;
4157       ivs->bad_uses++;
4158     }
4159
4160   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4161     {
4162       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4163
4164       if (cheaper_cost_pair (cp, best_cp))
4165         best_cp = cp;
4166     }
4167
4168   iv_ca_set_cp (data, ivs, use, best_cp);
4169 }
4170
4171 /* Get cost for assignment IVS.  */
4172
4173 static unsigned
4174 iv_ca_cost (struct iv_ca *ivs)
4175 {
4176   return (ivs->bad_uses ? INFTY : ivs->cost);
4177 }
4178
4179 /* Returns true if all dependences of CP are among invariants in IVS.  */
4180
4181 static bool
4182 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4183 {
4184   unsigned i;
4185   bitmap_iterator bi;
4186
4187   if (!cp->depends_on)
4188     return true;
4189
4190   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4191     {
4192       if (ivs->n_invariant_uses[i] == 0)
4193         return false;
4194     }
4195
4196   return true;
4197 }
4198
4199 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4200    it before NEXT_CHANGE.  */
4201
4202 static struct iv_ca_delta *
4203 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4204                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4205 {
4206   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4207
4208   change->use = use;
4209   change->old_cp = old_cp;
4210   change->new_cp = new_cp;
4211   change->next_change = next_change;
4212
4213   return change;
4214 }
4215
4216 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4217    are rewritten.  */
4218
4219 static struct iv_ca_delta *
4220 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4221 {
4222   struct iv_ca_delta *last;
4223
4224   if (!l2)
4225     return l1;
4226
4227   if (!l1)
4228     return l2;
4229
4230   for (last = l1; last->next_change; last = last->next_change)
4231     continue;
4232   last->next_change = l2;
4233
4234   return l1;
4235 }
4236
4237 /* Returns candidate by that USE is expressed in IVS.  */
4238
4239 static struct cost_pair *
4240 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4241 {
4242   return ivs->cand_for_use[use->id];
4243 }
4244
4245 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4246
4247 static struct iv_ca_delta *
4248 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4249 {
4250   struct iv_ca_delta *act, *next, *prev = NULL;
4251   struct cost_pair *tmp;
4252
4253   for (act = delta; act; act = next)
4254     {
4255       next = act->next_change;
4256       act->next_change = prev;
4257       prev = act;
4258
4259       tmp = act->old_cp;
4260       act->old_cp = act->new_cp;
4261       act->new_cp = tmp;
4262     }
4263
4264   return prev;
4265 }
4266
4267 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4268    reverted instead.  */
4269
4270 static void
4271 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4272                     struct iv_ca_delta *delta, bool forward)
4273 {
4274   struct cost_pair *from, *to;
4275   struct iv_ca_delta *act;
4276
4277   if (!forward)
4278     delta = iv_ca_delta_reverse (delta);
4279
4280   for (act = delta; act; act = act->next_change)
4281     {
4282       from = act->old_cp;
4283       to = act->new_cp;
4284       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4285       iv_ca_set_cp (data, ivs, act->use, to);
4286     }
4287
4288   if (!forward)
4289     iv_ca_delta_reverse (delta);
4290 }
4291
4292 /* Returns true if CAND is used in IVS.  */
4293
4294 static bool
4295 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4296 {
4297   return ivs->n_cand_uses[cand->id] > 0;
4298 }
4299
4300 /* Returns number of induction variable candidates in the set IVS.  */
4301
4302 static unsigned
4303 iv_ca_n_cands (struct iv_ca *ivs)
4304 {
4305   return ivs->n_cands;
4306 }
4307
4308 /* Free the list of changes DELTA.  */
4309
4310 static void
4311 iv_ca_delta_free (struct iv_ca_delta **delta)
4312 {
4313   struct iv_ca_delta *act, *next;
4314
4315   for (act = *delta; act; act = next)
4316     {
4317       next = act->next_change;
4318       free (act);
4319     }
4320
4321   *delta = NULL;
4322 }
4323
4324 /* Allocates new iv candidates assignment.  */
4325
4326 static struct iv_ca *
4327 iv_ca_new (struct ivopts_data *data)
4328 {
4329   struct iv_ca *nw = XNEW (struct iv_ca);
4330
4331   nw->upto = 0;
4332   nw->bad_uses = 0;
4333   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4334   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4335   nw->cands = BITMAP_ALLOC (NULL);
4336   nw->n_cands = 0;
4337   nw->n_regs = 0;
4338   nw->cand_use_cost = 0;
4339   nw->cand_cost = 0;
4340   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4341   nw->cost = 0;
4342
4343   return nw;
4344 }
4345
4346 /* Free memory occupied by the set IVS.  */
4347
4348 static void
4349 iv_ca_free (struct iv_ca **ivs)
4350 {
4351   free ((*ivs)->cand_for_use);
4352   free ((*ivs)->n_cand_uses);
4353   BITMAP_FREE ((*ivs)->cands);
4354   free ((*ivs)->n_invariant_uses);
4355   free (*ivs);
4356   *ivs = NULL;
4357 }
4358
4359 /* Dumps IVS to FILE.  */
4360
4361 static void
4362 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4363 {
4364   const char *pref = "  invariants ";
4365   unsigned i;
4366
4367   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4368   bitmap_print (file, ivs->cands, "  candidates ","\n");
4369
4370   for (i = 1; i <= data->max_inv_id; i++)
4371     if (ivs->n_invariant_uses[i])
4372       {
4373         fprintf (file, "%s%d", pref, i);
4374         pref = ", ";
4375       }
4376   fprintf (file, "\n");
4377 }
4378
4379 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4380    new set, and store differences in DELTA.  Number of induction variables
4381    in the new set is stored to N_IVS.  */
4382
4383 static unsigned
4384 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4385               struct iv_cand *cand, struct iv_ca_delta **delta,
4386               unsigned *n_ivs)
4387 {
4388   unsigned i, cost;
4389   struct iv_use *use;
4390   struct cost_pair *old_cp, *new_cp;
4391
4392   *delta = NULL;
4393   for (i = 0; i < ivs->upto; i++)
4394     {
4395       use = iv_use (data, i);
4396       old_cp = iv_ca_cand_for_use (ivs, use);
4397
4398       if (old_cp
4399           && old_cp->cand == cand)
4400         continue;
4401
4402       new_cp = get_use_iv_cost (data, use, cand);
4403       if (!new_cp)
4404         continue;
4405
4406       if (!iv_ca_has_deps (ivs, new_cp))
4407         continue;
4408
4409       if (!cheaper_cost_pair (new_cp, old_cp))
4410         continue;
4411
4412       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4413     }
4414
4415   iv_ca_delta_commit (data, ivs, *delta, true);
4416   cost = iv_ca_cost (ivs);
4417   if (n_ivs)
4418     *n_ivs = iv_ca_n_cands (ivs);
4419   iv_ca_delta_commit (data, ivs, *delta, false);
4420
4421   return cost;
4422 }
4423
4424 /* Try narrowing set IVS by removing CAND.  Return the cost of
4425    the new set and store the differences in DELTA.  */
4426
4427 static unsigned
4428 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4429               struct iv_cand *cand, struct iv_ca_delta **delta)
4430 {
4431   unsigned i, ci;
4432   struct iv_use *use;
4433   struct cost_pair *old_cp, *new_cp, *cp;
4434   bitmap_iterator bi;
4435   struct iv_cand *cnd;
4436   unsigned cost;
4437
4438   *delta = NULL;
4439   for (i = 0; i < n_iv_uses (data); i++)
4440     {
4441       use = iv_use (data, i);
4442
4443       old_cp = iv_ca_cand_for_use (ivs, use);
4444       if (old_cp->cand != cand)
4445         continue;
4446
4447       new_cp = NULL;
4448
4449       if (data->consider_all_candidates)
4450         {
4451           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4452             {
4453               if (ci == cand->id)
4454                 continue;
4455
4456               cnd = iv_cand (data, ci);
4457
4458               cp = get_use_iv_cost (data, use, cnd);
4459               if (!cp)
4460                 continue;
4461               if (!iv_ca_has_deps (ivs, cp))
4462                 continue;
4463
4464               if (!cheaper_cost_pair (cp, new_cp))
4465                 continue;
4466
4467               new_cp = cp;
4468             }
4469         }
4470       else
4471         {
4472           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4473             {
4474               if (ci == cand->id)
4475                 continue;
4476
4477               cnd = iv_cand (data, ci);
4478
4479               cp = get_use_iv_cost (data, use, cnd);
4480               if (!cp)
4481                 continue;
4482               if (!iv_ca_has_deps (ivs, cp))
4483                 continue;
4484
4485               if (!cheaper_cost_pair (cp, new_cp))
4486                 continue;
4487
4488               new_cp = cp;
4489             }
4490         }
4491
4492       if (!new_cp)
4493         {
4494           iv_ca_delta_free (delta);
4495           return INFTY;
4496         }
4497
4498       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4499     }
4500
4501   iv_ca_delta_commit (data, ivs, *delta, true);
4502   cost = iv_ca_cost (ivs);
4503   iv_ca_delta_commit (data, ivs, *delta, false);
4504
4505   return cost;
4506 }
4507
4508 /* Try optimizing the set of candidates IVS by removing candidates different
4509    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4510    differences in DELTA.  */
4511
4512 static unsigned
4513 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4514              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4515 {
4516   bitmap_iterator bi;
4517   struct iv_ca_delta *act_delta, *best_delta;
4518   unsigned i, best_cost, acost;
4519   struct iv_cand *cand;
4520
4521   best_delta = NULL;
4522   best_cost = iv_ca_cost (ivs);
4523
4524   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4525     {
4526       cand = iv_cand (data, i);
4527
4528       if (cand == except_cand)
4529         continue;
4530
4531       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4532
4533       if (acost < best_cost)
4534         {
4535           best_cost = acost;
4536           iv_ca_delta_free (&best_delta);
4537           best_delta = act_delta;
4538         }
4539       else
4540         iv_ca_delta_free (&act_delta);
4541     }
4542
4543   if (!best_delta)
4544     {
4545       *delta = NULL;
4546       return best_cost;
4547     }
4548
4549   /* Recurse to possibly remove other unnecessary ivs.  */
4550   iv_ca_delta_commit (data, ivs, best_delta, true);
4551   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4552   iv_ca_delta_commit (data, ivs, best_delta, false);
4553   *delta = iv_ca_delta_join (best_delta, *delta);
4554   return best_cost;
4555 }
4556
4557 /* Tries to extend the sets IVS in the best possible way in order
4558    to express the USE.  */
4559
4560 static bool
4561 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4562                   struct iv_use *use)
4563 {
4564   unsigned best_cost, act_cost;
4565   unsigned i;
4566   bitmap_iterator bi;
4567   struct iv_cand *cand;
4568   struct iv_ca_delta *best_delta = NULL, *act_delta;
4569   struct cost_pair *cp;
4570
4571   iv_ca_add_use (data, ivs, use);
4572   best_cost = iv_ca_cost (ivs);
4573
4574   cp = iv_ca_cand_for_use (ivs, use);
4575   if (cp)
4576     {
4577       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4578       iv_ca_set_no_cp (data, ivs, use);
4579     }
4580
4581   /* First try important candidates.  Only if it fails, try the specific ones.
4582      Rationale -- in loops with many variables the best choice often is to use
4583      just one generic biv.  If we added here many ivs specific to the uses,
4584      the optimization algorithm later would be likely to get stuck in a local
4585      minimum, thus causing us to create too many ivs.  The approach from
4586      few ivs to more seems more likely to be successful -- starting from few
4587      ivs, replacing an expensive use by a specific iv should always be a
4588      win.  */
4589   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4590     {
4591       cand = iv_cand (data, i);
4592
4593       if (iv_ca_cand_used_p (ivs, cand))
4594         continue;
4595
4596       cp = get_use_iv_cost (data, use, cand);
4597       if (!cp)
4598         continue;
4599
4600       iv_ca_set_cp (data, ivs, use, cp);
4601       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4602       iv_ca_set_no_cp (data, ivs, use);
4603       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4604
4605       if (act_cost < best_cost)
4606         {
4607           best_cost = act_cost;
4608
4609           iv_ca_delta_free (&best_delta);
4610           best_delta = act_delta;
4611         }
4612       else
4613         iv_ca_delta_free (&act_delta);
4614     }
4615
4616   if (best_cost == INFTY)
4617     {
4618       for (i = 0; i < use->n_map_members; i++)
4619         {
4620           cp = use->cost_map + i;
4621           cand = cp->cand;
4622           if (!cand)
4623             continue;
4624
4625           /* Already tried this.  */
4626           if (cand->important)
4627             continue;
4628
4629           if (iv_ca_cand_used_p (ivs, cand))
4630             continue;
4631
4632           act_delta = NULL;
4633           iv_ca_set_cp (data, ivs, use, cp);
4634           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4635           iv_ca_set_no_cp (data, ivs, use);
4636           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4637                                        cp, act_delta);
4638
4639           if (act_cost < best_cost)
4640             {
4641               best_cost = act_cost;
4642
4643               if (best_delta)
4644                 iv_ca_delta_free (&best_delta);
4645               best_delta = act_delta;
4646             }
4647           else
4648             iv_ca_delta_free (&act_delta);
4649         }
4650     }
4651
4652   iv_ca_delta_commit (data, ivs, best_delta, true);
4653   iv_ca_delta_free (&best_delta);
4654
4655   return (best_cost != INFTY);
4656 }
4657
4658 /* Finds an initial assignment of candidates to uses.  */
4659
4660 static struct iv_ca *
4661 get_initial_solution (struct ivopts_data *data)
4662 {
4663   struct iv_ca *ivs = iv_ca_new (data);
4664   unsigned i;
4665
4666   for (i = 0; i < n_iv_uses (data); i++)
4667     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4668       {
4669         iv_ca_free (&ivs);
4670         return NULL;
4671       }
4672
4673   return ivs;
4674 }
4675
4676 /* Tries to improve set of induction variables IVS.  */
4677
4678 static bool
4679 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4680 {
4681   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4682   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4683   struct iv_cand *cand;
4684
4685   /* Try extending the set of induction variables by one.  */
4686   for (i = 0; i < n_iv_cands (data); i++)
4687     {
4688       cand = iv_cand (data, i);
4689
4690       if (iv_ca_cand_used_p (ivs, cand))
4691         continue;
4692
4693       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4694       if (!act_delta)
4695         continue;
4696
4697       /* If we successfully added the candidate and the set is small enough,
4698          try optimizing it by removing other candidates.  */
4699       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4700         {
4701           iv_ca_delta_commit (data, ivs, act_delta, true);
4702           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4703           iv_ca_delta_commit (data, ivs, act_delta, false);
4704           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4705         }
4706
4707       if (acost < best_cost)
4708         {
4709           best_cost = acost;
4710           iv_ca_delta_free (&best_delta);
4711           best_delta = act_delta;
4712         }
4713       else
4714         iv_ca_delta_free (&act_delta);
4715     }
4716
4717   if (!best_delta)
4718     {
4719       /* Try removing the candidates from the set instead.  */
4720       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4721
4722       /* Nothing more we can do.  */
4723       if (!best_delta)
4724         return false;
4725     }
4726
4727   iv_ca_delta_commit (data, ivs, best_delta, true);
4728   gcc_assert (best_cost == iv_ca_cost (ivs));
4729   iv_ca_delta_free (&best_delta);
4730   return true;
4731 }
4732
4733 /* Attempts to find the optimal set of induction variables.  We do simple
4734    greedy heuristic -- we try to replace at most one candidate in the selected
4735    solution and remove the unused ivs while this improves the cost.  */
4736
4737 static struct iv_ca *
4738 find_optimal_iv_set (struct ivopts_data *data)
4739 {
4740   unsigned i;
4741   struct iv_ca *set;
4742   struct iv_use *use;
4743
4744   /* Get the initial solution.  */
4745   set = get_initial_solution (data);
4746   if (!set)
4747     {
4748       if (dump_file && (dump_flags & TDF_DETAILS))
4749         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4750       return NULL;
4751     }
4752
4753   if (dump_file && (dump_flags & TDF_DETAILS))
4754     {
4755       fprintf (dump_file, "Initial set of candidates:\n");
4756       iv_ca_dump (data, dump_file, set);
4757     }
4758
4759   while (try_improve_iv_set (data, set))
4760     {
4761       if (dump_file && (dump_flags & TDF_DETAILS))
4762         {
4763           fprintf (dump_file, "Improved to:\n");
4764           iv_ca_dump (data, dump_file, set);
4765         }
4766     }
4767
4768   if (dump_file && (dump_flags & TDF_DETAILS))
4769     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4770
4771   for (i = 0; i < n_iv_uses (data); i++)
4772     {
4773       use = iv_use (data, i);
4774       use->selected = iv_ca_cand_for_use (set, use)->cand;
4775     }
4776
4777   return set;
4778 }
4779
4780 /* Creates a new induction variable corresponding to CAND.  */
4781
4782 static void
4783 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4784 {
4785   block_stmt_iterator incr_pos;
4786   tree base;
4787   bool after = false;
4788
4789   if (!cand->iv)
4790     return;
4791
4792   switch (cand->pos)
4793     {
4794     case IP_NORMAL:
4795       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4796       break;
4797
4798     case IP_END:
4799       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4800       after = true;
4801       break;
4802
4803     case IP_ORIGINAL:
4804       /* Mark that the iv is preserved.  */
4805       name_info (data, cand->var_before)->preserve_biv = true;
4806       name_info (data, cand->var_after)->preserve_biv = true;
4807
4808       /* Rewrite the increment so that it uses var_before directly.  */
4809       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4810
4811       return;
4812     }
4813
4814   gimple_add_tmp_var (cand->var_before);
4815   add_referenced_var (cand->var_before);
4816
4817   base = unshare_expr (cand->iv->base);
4818
4819   create_iv (base, unshare_expr (cand->iv->step),
4820              cand->var_before, data->current_loop,
4821              &incr_pos, after, &cand->var_before, &cand->var_after);
4822 }
4823
4824 /* Creates new induction variables described in SET.  */
4825
4826 static void
4827 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4828 {
4829   unsigned i;
4830   struct iv_cand *cand;
4831   bitmap_iterator bi;
4832
4833   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4834     {
4835       cand = iv_cand (data, i);
4836       create_new_iv (data, cand);
4837     }
4838 }
4839
4840 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4841    is true, remove also the ssa name defined by the statement.  */
4842
4843 static void
4844 remove_statement (tree stmt, bool including_defined_name)
4845 {
4846   if (TREE_CODE (stmt) == PHI_NODE)
4847     {
4848       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4849     }
4850   else
4851     {
4852       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4853
4854       bsi_remove (&bsi, true);
4855       release_defs (stmt);
4856     }
4857 }
4858
4859 /* Rewrites USE (definition of iv used in a nonlinear expression)
4860    using candidate CAND.  */
4861
4862 static void
4863 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4864                             struct iv_use *use, struct iv_cand *cand)
4865 {
4866   tree comp;
4867   tree op, stmts, tgt, ass;
4868   block_stmt_iterator bsi, pbsi;
4869
4870   /* An important special case -- if we are asked to express value of
4871      the original iv by itself, just exit; there is no need to
4872      introduce a new computation (that might also need casting the
4873      variable to unsigned and back).  */
4874   if (cand->pos == IP_ORIGINAL
4875       && cand->incremented_at == use->stmt)
4876     {
4877       tree step, ctype, utype;
4878       enum tree_code incr_code = PLUS_EXPR;
4879
4880       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4881       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4882
4883       step = cand->iv->step;
4884       ctype = TREE_TYPE (step);
4885       utype = TREE_TYPE (cand->var_after);
4886       if (TREE_CODE (step) == NEGATE_EXPR)
4887         {
4888           incr_code = MINUS_EXPR;
4889           step = TREE_OPERAND (step, 0);
4890         }
4891
4892       /* Check whether we may leave the computation unchanged.
4893          This is the case only if it does not rely on other
4894          computations in the loop -- otherwise, the computation
4895          we rely upon may be removed in remove_unused_ivs,
4896          thus leading to ICE.  */
4897       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4898       if (TREE_CODE (op) == PLUS_EXPR
4899           || TREE_CODE (op) == MINUS_EXPR)
4900         {
4901           if (TREE_OPERAND (op, 0) == cand->var_before)
4902             op = TREE_OPERAND (op, 1);
4903           else if (TREE_CODE (op) == PLUS_EXPR
4904                    && TREE_OPERAND (op, 1) == cand->var_before)
4905             op = TREE_OPERAND (op, 0);
4906           else
4907             op = NULL_TREE;
4908         }
4909       else
4910         op = NULL_TREE;
4911
4912       if (op
4913           && (TREE_CODE (op) == INTEGER_CST
4914               || operand_equal_p (op, step, 0)))
4915         return;
4916
4917       /* Otherwise, add the necessary computations to express
4918          the iv.  */
4919       op = fold_convert (ctype, cand->var_before);
4920       comp = fold_convert (utype,
4921                            build2 (incr_code, ctype, op,
4922                                    unshare_expr (step)));
4923     }
4924   else
4925     {
4926       comp = get_computation (data->current_loop, use, cand);
4927       gcc_assert (comp != NULL_TREE);
4928     }
4929
4930   switch (TREE_CODE (use->stmt))
4931     {
4932     case PHI_NODE:
4933       tgt = PHI_RESULT (use->stmt);
4934
4935       /* If we should keep the biv, do not replace it.  */
4936       if (name_info (data, tgt)->preserve_biv)
4937         return;
4938
4939       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4940       while (!bsi_end_p (pbsi)
4941              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4942         {
4943           bsi = pbsi;
4944           bsi_next (&pbsi);
4945         }
4946       break;
4947
4948     case GIMPLE_MODIFY_STMT:
4949       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4950       bsi = bsi_for_stmt (use->stmt);
4951       break;
4952
4953     default:
4954       gcc_unreachable ();
4955     }
4956
4957   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4958
4959   if (TREE_CODE (use->stmt) == PHI_NODE)
4960     {
4961       if (stmts)
4962         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4963       ass = build_gimple_modify_stmt (tgt, op);
4964       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4965       remove_statement (use->stmt, false);
4966       SSA_NAME_DEF_STMT (tgt) = ass;
4967     }
4968   else
4969     {
4970       if (stmts)
4971         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4972       GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4973     }
4974 }
4975
4976 /* Replaces ssa name in index IDX by its basic variable.  Callback for
4977    for_each_index.  */
4978
4979 static bool
4980 idx_remove_ssa_names (tree base, tree *idx,
4981                       void *data ATTRIBUTE_UNUSED)
4982 {
4983   tree *op;
4984
4985   if (TREE_CODE (*idx) == SSA_NAME)
4986     *idx = SSA_NAME_VAR (*idx);
4987
4988   if (TREE_CODE (base) == ARRAY_REF)
4989     {
4990       op = &TREE_OPERAND (base, 2);
4991       if (*op
4992           && TREE_CODE (*op) == SSA_NAME)
4993         *op = SSA_NAME_VAR (*op);
4994       op = &TREE_OPERAND (base, 3);
4995       if (*op
4996           && TREE_CODE (*op) == SSA_NAME)
4997         *op = SSA_NAME_VAR (*op);
4998     }
4999
5000   return true;
5001 }
5002
5003 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5004
5005 static tree
5006 unshare_and_remove_ssa_names (tree ref)
5007 {
5008   ref = unshare_expr (ref);
5009   for_each_index (&ref, idx_remove_ssa_names, NULL);
5010
5011   return ref;
5012 }
5013
5014 /* Extract the alias analysis info for the memory reference REF.  There are
5015    several ways how this information may be stored and what precisely is
5016    its semantics depending on the type of the reference, but there always is
5017    somewhere hidden one _DECL node that is used to determine the set of
5018    virtual operands for the reference.  The code below deciphers this jungle
5019    and extracts this single useful piece of information.  */
5020
5021 static tree
5022 get_ref_tag (tree ref, tree orig)
5023 {
5024   tree var = get_base_address (ref);
5025   tree aref = NULL_TREE, tag, sv;
5026   HOST_WIDE_INT offset, size, maxsize;
5027
5028   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5029     {
5030       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5031       if (ref)
5032         break;
5033     }
5034
5035   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5036     return unshare_expr (sv);
5037
5038   if (!var)
5039     return NULL_TREE;
5040
5041   if (TREE_CODE (var) == INDIRECT_REF)
5042     {
5043       /* If the base is a dereference of a pointer, first check its name memory
5044          tag.  If it does not have one, use its symbol memory tag.  */
5045       var = TREE_OPERAND (var, 0);
5046       if (TREE_CODE (var) != SSA_NAME)
5047         return NULL_TREE;
5048
5049       if (SSA_NAME_PTR_INFO (var))
5050         {
5051           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5052           if (tag)
5053             return tag;
5054         }
5055
5056       var = SSA_NAME_VAR (var);
5057       tag = symbol_mem_tag (var);
5058       gcc_assert (tag != NULL_TREE);
5059       return tag;
5060     }
5061   else
5062     {
5063       if (!DECL_P (var))
5064         return NULL_TREE;
5065
5066       tag = symbol_mem_tag (var);
5067       if (tag)
5068         return tag;
5069
5070       return var;
5071     }
5072 }
5073
5074 /* Copies the reference information from OLD_REF to NEW_REF.  */
5075
5076 static void
5077 copy_ref_info (tree new_ref, tree old_ref)
5078 {
5079   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5080     copy_mem_ref_info (new_ref, old_ref);
5081   else
5082     {
5083       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5084       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5085     }
5086 }
5087
5088 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5089
5090 static void
5091 rewrite_use_address (struct ivopts_data *data,
5092                      struct iv_use *use, struct iv_cand *cand)
5093 {
5094   aff_tree aff;
5095   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5096   tree ref;
5097   bool ok;
5098
5099   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5100   gcc_assert (ok);
5101   unshare_aff_combination (&aff);
5102
5103   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5104   copy_ref_info (ref, *use->op_p);
5105   *use->op_p = ref;
5106 }
5107
5108 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5109    candidate CAND.  */
5110
5111 static void
5112 rewrite_use_compare (struct ivopts_data *data,
5113                      struct iv_use *use, struct iv_cand *cand)
5114 {
5115   tree comp, *var_p, op, bound;
5116   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5117   enum tree_code compare;
5118   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5119   bool ok;
5120
5121   bound = cp->value;
5122   if (bound)
5123     {
5124       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5125       tree var_type = TREE_TYPE (var);
5126
5127       compare = iv_elimination_compare (data, use);
5128       bound = unshare_expr (fold_convert (var_type, bound));
5129       op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE);
5130
5131       *use->op_p = build2 (compare, boolean_type_node, var, op);
5132       return;
5133     }
5134
5135   /* The induction variable elimination failed; just express the original
5136      giv.  */
5137   comp = get_computation (data->current_loop, use, cand);
5138   gcc_assert (comp != NULL_TREE);
5139
5140   ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5141   gcc_assert (ok);
5142
5143   *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p));
5144 }
5145
5146 /* Rewrites USE using candidate CAND.  */
5147
5148 static void
5149 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5150 {
5151   push_stmt_changes (&use->stmt);
5152
5153   switch (use->type)
5154     {
5155       case USE_NONLINEAR_EXPR:
5156         rewrite_use_nonlinear_expr (data, use, cand);
5157         break;
5158
5159       case USE_ADDRESS:
5160         rewrite_use_address (data, use, cand);
5161         break;
5162
5163       case USE_COMPARE:
5164         rewrite_use_compare (data, use, cand);
5165         break;
5166
5167       default:
5168         gcc_unreachable ();
5169     }
5170
5171   pop_stmt_changes (&use->stmt);
5172 }
5173
5174 /* Rewrite the uses using the selected induction variables.  */
5175
5176 static void
5177 rewrite_uses (struct ivopts_data *data)
5178 {
5179   unsigned i;
5180   struct iv_cand *cand;
5181   struct iv_use *use;
5182
5183   for (i = 0; i < n_iv_uses (data); i++)
5184     {
5185       use = iv_use (data, i);
5186       cand = use->selected;
5187       gcc_assert (cand);
5188
5189       rewrite_use (data, use, cand);
5190     }
5191 }
5192
5193 /* Removes the ivs that are not used after rewriting.  */
5194
5195 static void
5196 remove_unused_ivs (struct ivopts_data *data)
5197 {
5198   unsigned j;
5199   bitmap_iterator bi;
5200
5201   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5202     {
5203       struct version_info *info;
5204
5205       info = ver_info (data, j);
5206       if (info->iv
5207           && !integer_zerop (info->iv->step)
5208           && !info->inv_id
5209           && !info->iv->have_use_for
5210           && !info->preserve_biv)
5211         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5212     }
5213 }
5214
5215 /* Frees data allocated by the optimization of a single loop.  */
5216
5217 static void
5218 free_loop_data (struct ivopts_data *data)
5219 {
5220   unsigned i, j;
5221   bitmap_iterator bi;
5222   tree obj;
5223
5224   if (data->niters)
5225     {
5226       pointer_map_destroy (data->niters);
5227       data->niters = NULL;
5228     }
5229
5230   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5231     {
5232       struct version_info *info;
5233
5234       info = ver_info (data, i);
5235       if (info->iv)
5236         free (info->iv);
5237       info->iv = NULL;
5238       info->has_nonlin_use = false;
5239       info->preserve_biv = false;
5240       info->inv_id = 0;
5241     }
5242   bitmap_clear (data->relevant);
5243   bitmap_clear (data->important_candidates);
5244
5245   for (i = 0; i < n_iv_uses (data); i++)
5246     {
5247       struct iv_use *use = iv_use (data, i);
5248
5249       free (use->iv);
5250       BITMAP_FREE (use->related_cands);
5251       for (j = 0; j < use->n_map_members; j++)
5252         if (use->cost_map[j].depends_on)
5253           BITMAP_FREE (use->cost_map[j].depends_on);
5254       free (use->cost_map);
5255       free (use);
5256     }
5257   VEC_truncate (iv_use_p, data->iv_uses, 0);
5258
5259   for (i = 0; i < n_iv_cands (data); i++)
5260     {
5261       struct iv_cand *cand = iv_cand (data, i);
5262
5263       if (cand->iv)
5264         free (cand->iv);
5265       if (cand->depends_on)
5266         BITMAP_FREE (cand->depends_on);
5267       free (cand);
5268     }
5269   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5270
5271   if (data->version_info_size < num_ssa_names)
5272     {
5273       data->version_info_size = 2 * num_ssa_names;
5274       free (data->version_info);
5275       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5276     }
5277
5278   data->max_inv_id = 0;
5279
5280   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5281     SET_DECL_RTL (obj, NULL_RTX);
5282
5283   VEC_truncate (tree, decl_rtl_to_reset, 0);
5284 }
5285
5286 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5287    loop tree.  */
5288
5289 static void
5290 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5291 {
5292   free_loop_data (data);
5293   free (data->version_info);
5294   BITMAP_FREE (data->relevant);
5295   BITMAP_FREE (data->important_candidates);
5296
5297   VEC_free (tree, heap, decl_rtl_to_reset);
5298   VEC_free (iv_use_p, heap, data->iv_uses);
5299   VEC_free (iv_cand_p, heap, data->iv_candidates);
5300 }
5301
5302 /* Optimizes the LOOP.  Returns true if anything changed.  */
5303
5304 static bool
5305 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5306 {
5307   bool changed = false;
5308   struct iv_ca *iv_ca;
5309   edge exit;
5310
5311   gcc_assert (!data->niters);
5312   data->current_loop = loop;
5313
5314   if (dump_file && (dump_flags & TDF_DETAILS))
5315     {
5316       fprintf (dump_file, "Processing loop %d\n", loop->num);
5317
5318       exit = single_dom_exit (loop);
5319       if (exit)
5320         {
5321           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5322                    exit->src->index, exit->dest->index);
5323           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5324           fprintf (dump_file, "\n");
5325         }
5326
5327       fprintf (dump_file, "\n");
5328     }
5329
5330   /* For each ssa name determines whether it behaves as an induction variable
5331      in some loop.  */
5332   if (!find_induction_variables (data))
5333     goto finish;
5334
5335   /* Finds interesting uses (item 1).  */
5336   find_interesting_uses (data);
5337   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5338     goto finish;
5339
5340   /* Finds candidates for the induction variables (item 2).  */
5341   find_iv_candidates (data);
5342
5343   /* Calculates the costs (item 3, part 1).  */
5344   determine_use_iv_costs (data);
5345   determine_iv_costs (data);
5346   determine_set_costs (data);
5347
5348   /* Find the optimal set of induction variables (item 3, part 2).  */
5349   iv_ca = find_optimal_iv_set (data);
5350   if (!iv_ca)
5351     goto finish;
5352   changed = true;
5353
5354   /* Create the new induction variables (item 4, part 1).  */
5355   create_new_ivs (data, iv_ca);
5356   iv_ca_free (&iv_ca);
5357
5358   /* Rewrite the uses (item 4, part 2).  */
5359   rewrite_uses (data);
5360
5361   /* Remove the ivs that are unused after rewriting.  */
5362   remove_unused_ivs (data);
5363
5364   /* We have changed the structure of induction variables; it might happen
5365      that definitions in the scev database refer to some of them that were
5366      eliminated.  */
5367   scev_reset ();
5368
5369 finish:
5370   free_loop_data (data);
5371
5372   return changed;
5373 }
5374
5375 /* Main entry point.  Optimizes induction variables in loops.  */
5376
5377 void
5378 tree_ssa_iv_optimize (void)
5379 {
5380   struct loop *loop;
5381   struct ivopts_data data;
5382   loop_iterator li;
5383
5384   tree_ssa_iv_optimize_init (&data);
5385
5386   /* Optimize the loops starting with the innermost ones.  */
5387   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5388     {
5389       if (dump_file && (dump_flags & TDF_DETAILS))
5390         flow_loop_dump (loop, dump_file, NULL, 1);
5391
5392       tree_ssa_iv_optimize_loop (&data, loop);
5393     }
5394
5395   tree_ssa_iv_optimize_finalize (&data);
5396 }