gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "pointer-set.h"
  87 #include "hashtab.h"
  88 #include "tree-chrec.h"
  89 #include "tree-scalar-evolution.h"
  90 #include "cfgloop.h"
  91 #include "params.h"
  92 #include "langhooks.h"
  93 #include "tree-affine.h"
  94 #include "target.h"
  95
  96 /* The infinite cost.  */
  97 #define INFTY 10000000
  98
  99 /* The expected number of loop iterations.  TODO -- use profiling instead of
 100    this.  */
 101 #define AVG_LOOP_NITER(LOOP) 5
 102
 103
 104 /* Representation of the induction variable.  */
 105 struct iv
 106 {
 107   tree base;            /* Initial value of the iv.  */
 108   tree base_object;     /* A memory object to that the induction variable points.  */
 109   tree step;            /* Step of the iv (constant only).  */
 110   tree ssa_name;        /* The ssa name with the value.  */
 111   bool biv_p;           /* Is it a biv?  */
 112   bool have_use_for;    /* Do we already have a use for it?  */
 113   unsigned use_id;      /* The identifier in the use if it is the case.  */
 114 };
 115
 116 /* Per-ssa version information (induction variable descriptions, etc.).  */
 117 struct version_info
 118 {
 119   tree name;            /* The ssa name.  */
 120   struct iv *iv;        /* Induction variable description.  */
 121   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 122                            an expression that is not an induction variable.  */
 123   unsigned inv_id;      /* Id of an invariant.  */
 124   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 125 };
 126
 127 /* Types of uses.  */
 128 enum use_type
 129 {
 130   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 131   USE_ADDRESS,          /* Use in an address.  */
 132   USE_COMPARE           /* Use is a compare.  */
 133 };
 134
 135 /* The candidate - cost pair.  */
 136 struct cost_pair
 137 {
 138   struct iv_cand *cand; /* The candidate.  */
 139   unsigned cost;        /* The cost.  */
 140   bitmap depends_on;    /* The list of invariants that have to be
 141                            preserved.  */
 142   tree value;           /* For final value elimination, the expression for
 143                            the final value of the iv.  For iv elimination,
 144                            the new bound to compare with.  */
 145 };
 146
 147 /* Use.  */
 148 struct iv_use
 149 {
 150   unsigned id;          /* The id of the use.  */
 151   enum use_type type;   /* Type of the use.  */
 152   struct iv *iv;        /* The induction variable it is based on.  */
 153   tree stmt;            /* Statement in that it occurs.  */
 154   tree *op_p;           /* The place where it occurs.  */
 155   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 156                            important ones.  */
 157
 158   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 159   struct cost_pair *cost_map;
 160                         /* The costs wrto the iv candidates.  */
 161
 162   struct iv_cand *selected;
 163                         /* The selected candidate.  */
 164 };
 165
 166 /* The position where the iv is computed.  */
 167 enum iv_position
 168 {
 169   IP_NORMAL,            /* At the end, just before the exit condition.  */
 170   IP_END,               /* At the end of the latch block.  */
 171   IP_ORIGINAL           /* The original biv.  */
 172 };
 173
 174 /* The induction variable candidate.  */
 175 struct iv_cand
 176 {
 177   unsigned id;          /* The number of the candidate.  */
 178   bool important;       /* Whether this is an "important" candidate, i.e. such
 179                            that it should be considered by all uses.  */
 180   enum iv_position pos; /* Where it is computed.  */
 181   tree incremented_at;  /* For original biv, the statement where it is
 182                            incremented.  */
 183   tree var_before;      /* The variable used for it before increment.  */
 184   tree var_after;       /* The variable used for it after increment.  */
 185   struct iv *iv;        /* The value of the candidate.  NULL for
 186                            "pseudocandidate" used to indicate the possibility
 187                            to replace the final value of an iv by direct
 188                            computation of the value.  */
 189   unsigned cost;        /* Cost of the candidate.  */
 190   bitmap depends_on;    /* The list of invariants that are used in step of the
 191                            biv.  */
 192 };
 193
 194 /* The data used by the induction variable optimizations.  */
 195
 196 typedef struct iv_use *iv_use_p;
 197 DEF_VEC_P(iv_use_p);
 198 DEF_VEC_ALLOC_P(iv_use_p,heap);
 199
 200 typedef struct iv_cand *iv_cand_p;
 201 DEF_VEC_P(iv_cand_p);
 202 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 203
 204 struct ivopts_data
 205 {
 206   /* The currently optimized loop.  */
 207   struct loop *current_loop;
 208
 209   /* Number of registers used in it.  */
 210   unsigned regs_used;
 211
 212   /* Numbers of iterations for all exits of the current loop.  */
 213   struct pointer_map_t *niters;
 214
 215   /* The size of version_info array allocated.  */
 216   unsigned version_info_size;
 217
 218   /* The array of information for the ssa names.  */
 219   struct version_info *version_info;
 220
 221   /* The bitmap of indices in version_info whose value was changed.  */
 222   bitmap relevant;
 223
 224   /* The maximum invariant id.  */
 225   unsigned max_inv_id;
 226
 227   /* The uses of induction variables.  */
 228   VEC(iv_use_p,heap) *iv_uses;
 229
 230   /* The candidates.  */
 231   VEC(iv_cand_p,heap) *iv_candidates;
 232
 233   /* A bitmap of important candidates.  */
 234   bitmap important_candidates;
 235
 236   /* Whether to consider just related and important candidates when replacing a
 237      use.  */
 238   bool consider_all_candidates;
 239 };
 240
 241 /* An assignment of iv candidates to uses.  */
 242
 243 struct iv_ca
 244 {
 245   /* The number of uses covered by the assignment.  */
 246   unsigned upto;
 247
 248   /* Number of uses that cannot be expressed by the candidates in the set.  */
 249   unsigned bad_uses;
 250
 251   /* Candidate assigned to a use, together with the related costs.  */
 252   struct cost_pair **cand_for_use;
 253
 254   /* Number of times each candidate is used.  */
 255   unsigned *n_cand_uses;
 256
 257   /* The candidates used.  */
 258   bitmap cands;
 259
 260   /* The number of candidates in the set.  */
 261   unsigned n_cands;
 262
 263   /* Total number of registers needed.  */
 264   unsigned n_regs;
 265
 266   /* Total cost of expressing uses.  */
 267   unsigned cand_use_cost;
 268
 269   /* Total cost of candidates.  */
 270   unsigned cand_cost;
 271
 272   /* Number of times each invariant is used.  */
 273   unsigned *n_invariant_uses;
 274
 275   /* Total cost of the assignment.  */
 276   unsigned cost;
 277 };
 278
 279 /* Difference of two iv candidate assignments.  */
 280
 281 struct iv_ca_delta
 282 {
 283   /* Changed use.  */
 284   struct iv_use *use;
 285
 286   /* An old assignment (for rollback purposes).  */
 287   struct cost_pair *old_cp;
 288
 289   /* A new assignment.  */
 290   struct cost_pair *new_cp;
 291
 292   /* Next change in the list.  */
 293   struct iv_ca_delta *next_change;
 294 };
 295
 296 /* Bound on number of candidates below that all candidates are considered.  */
 297
 298 #define CONSIDER_ALL_CANDIDATES_BOUND \
 299   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 300
 301 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 302    optimizing such a loop would help, and it would take ages).  */
 303
 304 #define MAX_CONSIDERED_USES \
 305   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 306
 307 /* If there are at most this number of ivs in the set, try removing unnecessary
 308    ivs from the set always.  */
 309
 310 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 311   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 312
 313 /* The list of trees for that the decl_rtl field must be reset is stored
 314    here.  */
 315
 316 static VEC(tree,heap) *decl_rtl_to_reset;
 317
 318 /* Number of uses recorded in DATA.  */
 319
 320 static inline unsigned
 321 n_iv_uses (struct ivopts_data *data)
 322 {
 323   return VEC_length (iv_use_p, data->iv_uses);
 324 }
 325
 326 /* Ith use recorded in DATA.  */
 327
 328 static inline struct iv_use *
 329 iv_use (struct ivopts_data *data, unsigned i)
 330 {
 331   return VEC_index (iv_use_p, data->iv_uses, i);
 332 }
 333
 334 /* Number of candidates recorded in DATA.  */
 335
 336 static inline unsigned
 337 n_iv_cands (struct ivopts_data *data)
 338 {
 339   return VEC_length (iv_cand_p, data->iv_candidates);
 340 }
 341
 342 /* Ith candidate recorded in DATA.  */
 343
 344 static inline struct iv_cand *
 345 iv_cand (struct ivopts_data *data, unsigned i)
 346 {
 347   return VEC_index (iv_cand_p, data->iv_candidates, i);
 348 }
 349
 350 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 351
 352 edge
 353 single_dom_exit (struct loop *loop)
 354 {
 355   edge exit = single_exit (loop);
 356
 357   if (!exit)
 358     return NULL;
 359
 360   if (!just_once_each_iteration_p (loop, exit->src))
 361     return NULL;
 362
 363   return exit;
 364 }
 365
 366 /* Dumps information about the induction variable IV to FILE.  */
 367
 368 extern void dump_iv (FILE *, struct iv *);
 369 void
 370 dump_iv (FILE *file, struct iv *iv)
 371 {
 372   if (iv->ssa_name)
 373     {
 374       fprintf (file, "ssa name ");
 375       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 376       fprintf (file, "\n");
 377     }
 378
 379   fprintf (file, "  type ");
 380   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 381   fprintf (file, "\n");
 382
 383   if (iv->step)
 384     {
 385       fprintf (file, "  base ");
 386       print_generic_expr (file, iv->base, TDF_SLIM);
 387       fprintf (file, "\n");
 388
 389       fprintf (file, "  step ");
 390       print_generic_expr (file, iv->step, TDF_SLIM);
 391       fprintf (file, "\n");
 392     }
 393   else
 394     {
 395       fprintf (file, "  invariant ");
 396       print_generic_expr (file, iv->base, TDF_SLIM);
 397       fprintf (file, "\n");
 398     }
 399
 400   if (iv->base_object)
 401     {
 402       fprintf (file, "  base object ");
 403       print_generic_expr (file, iv->base_object, TDF_SLIM);
 404       fprintf (file, "\n");
 405     }
 406
 407   if (iv->biv_p)
 408     fprintf (file, "  is a biv\n");
 409 }
 410
 411 /* Dumps information about the USE to FILE.  */
 412
 413 extern void dump_use (FILE *, struct iv_use *);
 414 void
 415 dump_use (FILE *file, struct iv_use *use)
 416 {
 417   fprintf (file, "use %d\n", use->id);
 418
 419   switch (use->type)
 420     {
 421     case USE_NONLINEAR_EXPR:
 422       fprintf (file, "  generic\n");
 423       break;
 424
 425     case USE_ADDRESS:
 426       fprintf (file, "  address\n");
 427       break;
 428
 429     case USE_COMPARE:
 430       fprintf (file, "  compare\n");
 431       break;
 432
 433     default:
 434       gcc_unreachable ();
 435     }
 436
 437   fprintf (file, "  in statement ");
 438   print_generic_expr (file, use->stmt, TDF_SLIM);
 439   fprintf (file, "\n");
 440
 441   fprintf (file, "  at position ");
 442   if (use->op_p)
 443     print_generic_expr (file, *use->op_p, TDF_SLIM);
 444   fprintf (file, "\n");
 445
 446   dump_iv (file, use->iv);
 447
 448   if (use->related_cands)
 449     {
 450       fprintf (file, "  related candidates ");
 451       dump_bitmap (file, use->related_cands);
 452     }
 453 }
 454
 455 /* Dumps information about the uses to FILE.  */
 456
 457 extern void dump_uses (FILE *, struct ivopts_data *);
 458 void
 459 dump_uses (FILE *file, struct ivopts_data *data)
 460 {
 461   unsigned i;
 462   struct iv_use *use;
 463
 464   for (i = 0; i < n_iv_uses (data); i++)
 465     {
 466       use = iv_use (data, i);
 467
 468       dump_use (file, use);
 469       fprintf (file, "\n");
 470     }
 471 }
 472
 473 /* Dumps information about induction variable candidate CAND to FILE.  */
 474
 475 extern void dump_cand (FILE *, struct iv_cand *);
 476 void
 477 dump_cand (FILE *file, struct iv_cand *cand)
 478 {
 479   struct iv *iv = cand->iv;
 480
 481   fprintf (file, "candidate %d%s\n",
 482            cand->id, cand->important ? " (important)" : "");
 483
 484   if (cand->depends_on)
 485     {
 486       fprintf (file, "  depends on ");
 487       dump_bitmap (file, cand->depends_on);
 488     }
 489
 490   if (!iv)
 491     {
 492       fprintf (file, "  final value replacement\n");
 493       return;
 494     }
 495
 496   switch (cand->pos)
 497     {
 498     case IP_NORMAL:
 499       fprintf (file, "  incremented before exit test\n");
 500       break;
 501
 502     case IP_END:
 503       fprintf (file, "  incremented at end\n");
 504       break;
 505
 506     case IP_ORIGINAL:
 507       fprintf (file, "  original biv\n");
 508       break;
 509     }
 510
 511   dump_iv (file, iv);
 512 }
 513
 514 /* Returns the info for ssa version VER.  */
 515
 516 static inline struct version_info *
 517 ver_info (struct ivopts_data *data, unsigned ver)
 518 {
 519   return data->version_info + ver;
 520 }
 521
 522 /* Returns the info for ssa name NAME.  */
 523
 524 static inline struct version_info *
 525 name_info (struct ivopts_data *data, tree name)
 526 {
 527   return ver_info (data, SSA_NAME_VERSION (name));
 528 }
 529
 530 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 531    emitted in LOOP.  */
 532
 533 static bool
 534 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 535 {
 536   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 537
 538   gcc_assert (bb);
 539
 540   if (sbb == loop->latch)
 541     return true;
 542
 543   if (sbb != bb)
 544     return false;
 545
 546   return stmt == last_stmt (bb);
 547 }
 548
 549 /* Returns true if STMT if after the place where the original induction
 550    variable CAND is incremented.  */
 551
 552 static bool
 553 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 554 {
 555   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 556   basic_block stmt_bb = bb_for_stmt (stmt);
 557   block_stmt_iterator bsi;
 558
 559   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 560     return false;
 561
 562   if (stmt_bb != cand_bb)
 563     return true;
 564
 565   /* Scan the block from the end, since the original ivs are usually
 566      incremented at the end of the loop body.  */
 567   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 568     {
 569       if (bsi_stmt (bsi) == cand->incremented_at)
 570         return false;
 571       if (bsi_stmt (bsi) == stmt)
 572         return true;
 573     }
 574 }
 575
 576 /* Returns true if STMT if after the place where the induction variable
 577    CAND is incremented in LOOP.  */
 578
 579 static bool
 580 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 581 {
 582   switch (cand->pos)
 583     {
 584     case IP_END:
 585       return false;
 586
 587     case IP_NORMAL:
 588       return stmt_after_ip_normal_pos (loop, stmt);
 589
 590     case IP_ORIGINAL:
 591       return stmt_after_ip_original_pos (cand, stmt);
 592
 593     default:
 594       gcc_unreachable ();
 595     }
 596 }
 597
 598 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 599
 600 static bool
 601 abnormal_ssa_name_p (tree exp)
 602 {
 603   if (!exp)
 604     return false;
 605
 606   if (TREE_CODE (exp) != SSA_NAME)
 607     return false;
 608
 609   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 610 }
 611
 612 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 613    abnormal phi node.  Callback for for_each_index.  */
 614
 615 static bool
 616 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 617                                   void *data ATTRIBUTE_UNUSED)
 618 {
 619   if (TREE_CODE (base) == ARRAY_REF)
 620     {
 621       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 622         return false;
 623       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 624         return false;
 625     }
 626
 627   return !abnormal_ssa_name_p (*index);
 628 }
 629
 630 /* Returns true if EXPR contains a ssa name that occurs in an
 631    abnormal phi node.  */
 632
 633 bool
 634 contains_abnormal_ssa_name_p (tree expr)
 635 {
 636   enum tree_code code;
 637   enum tree_code_class codeclass;
 638
 639   if (!expr)
 640     return false;
 641
 642   code = TREE_CODE (expr);
 643   codeclass = TREE_CODE_CLASS (code);
 644
 645   if (code == SSA_NAME)
 646     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 647
 648   if (code == INTEGER_CST
 649       || is_gimple_min_invariant (expr))
 650     return false;
 651
 652   if (code == ADDR_EXPR)
 653     return !for_each_index (&TREE_OPERAND (expr, 0),
 654                             idx_contains_abnormal_ssa_name_p,
 655                             NULL);
 656
 657   switch (codeclass)
 658     {
 659     case tcc_binary:
 660     case tcc_comparison:
 661       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 662         return true;
 663
 664       /* Fallthru.  */
 665     case tcc_unary:
 666       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 667         return true;
 668
 669       break;
 670
 671     default:
 672       gcc_unreachable ();
 673     }
 674
 675   return false;
 676 }
 677
 678 /*  Returns tree describing number of iterations determined from
 679     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 680
 681 static tree
 682 niter_for_exit (struct ivopts_data *data, edge exit)
 683 {
 684   struct tree_niter_desc desc;
 685   tree niter;
 686   void **slot;
 687
 688   if (!data->niters)
 689     {
 690       data->niters = pointer_map_create ();
 691       slot = NULL;
 692     }
 693   else
 694     slot = pointer_map_contains (data->niters, exit);
 695
 696   if (!slot)
 697     {
 698       /* Try to determine number of iterations.  We must know it
 699          unconditionally (i.e., without possibility of # of iterations
 700          being zero).  Also, we cannot safely work with ssa names that
 701          appear in phi nodes on abnormal edges, so that we do not create
 702          overlapping life ranges for them (PR 27283).  */
 703       if (number_of_iterations_exit (data->current_loop,
 704                                      exit, &desc, true)
 705           && integer_zerop (desc.may_be_zero)
 706           && !contains_abnormal_ssa_name_p (desc.niter))
 707         niter = desc.niter;
 708       else
 709         niter = NULL_TREE;
 710
 711       *pointer_map_insert (data->niters, exit) = niter;
 712     }
 713   else
 714     niter = (tree) *slot;
 715
 716   return niter;
 717 }
 718
 719 /* Returns tree describing number of iterations determined from
 720    single dominating exit of DATA->current_loop, or NULL if something
 721    goes wrong.  */
 722
 723 static tree
 724 niter_for_single_dom_exit (struct ivopts_data *data)
 725 {
 726   edge exit = single_dom_exit (data->current_loop);
 727
 728   if (!exit)
 729     return NULL;
 730
 731   return niter_for_exit (data, exit);
 732 }
 733
 734 /* Initializes data structures used by the iv optimization pass, stored
 735    in DATA.  */
 736
 737 static void
 738 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 739 {
 740   data->version_info_size = 2 * num_ssa_names;
 741   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 742   data->relevant = BITMAP_ALLOC (NULL);
 743   data->important_candidates = BITMAP_ALLOC (NULL);
 744   data->max_inv_id = 0;
 745   data->niters = NULL;
 746   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 747   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 748   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 749 }
 750
 751 /* Returns a memory object to that EXPR points.  In case we are able to
 752    determine that it does not point to any such object, NULL is returned.  */
 753
 754 static tree
 755 determine_base_object (tree expr)
 756 {
 757   enum tree_code code = TREE_CODE (expr);
 758   tree base, obj;
 759
 760   /* If this is a pointer casted to any type, we need to determine
 761      the base object for the pointer; so handle conversions before
 762      throwing away non-pointer expressions.  */
 763   if (TREE_CODE (expr) == NOP_EXPR
 764       || TREE_CODE (expr) == CONVERT_EXPR)
 765     return determine_base_object (TREE_OPERAND (expr, 0));
 766
 767   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 768     return NULL_TREE;
 769
 770   switch (code)
 771     {
 772     case INTEGER_CST:
 773       return NULL_TREE;
 774
 775     case ADDR_EXPR:
 776       obj = TREE_OPERAND (expr, 0);
 777       base = get_base_address (obj);
 778
 779       if (!base)
 780         return expr;
 781
 782       if (TREE_CODE (base) == INDIRECT_REF)
 783         return determine_base_object (TREE_OPERAND (base, 0));
 784
 785       return fold_convert (ptr_type_node,
 786                            build_fold_addr_expr (base));
 787
 788     case POINTER_PLUS_EXPR:
 789       return determine_base_object (TREE_OPERAND (expr, 0));
 790
 791     case PLUS_EXPR:
 792     case MINUS_EXPR:
 793       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 794       gcc_unreachable ();
 795
 796     default:
 797       return fold_convert (ptr_type_node, expr);
 798     }
 799 }
 800
 801 /* Allocates an induction variable with given initial value BASE and step STEP
 802    for loop LOOP.  */
 803
 804 static struct iv *
 805 alloc_iv (tree base, tree step)
 806 {
 807   struct iv *iv = XCNEW (struct iv);
 808   gcc_assert (step != NULL_TREE);
 809
 810   iv->base = base;
 811   iv->base_object = determine_base_object (base);
 812   iv->step = step;
 813   iv->biv_p = false;
 814   iv->have_use_for = false;
 815   iv->use_id = 0;
 816   iv->ssa_name = NULL_TREE;
 817
 818   return iv;
 819 }
 820
 821 /* Sets STEP and BASE for induction variable IV.  */
 822
 823 static void
 824 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 825 {
 826   struct version_info *info = name_info (data, iv);
 827
 828   gcc_assert (!info->iv);
 829
 830   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 831   info->iv = alloc_iv (base, step);
 832   info->iv->ssa_name = iv;
 833 }
 834
 835 /* Finds induction variable declaration for VAR.  */
 836
 837 static struct iv *
 838 get_iv (struct ivopts_data *data, tree var)
 839 {
 840   basic_block bb;
 841   tree type = TREE_TYPE (var);
 842
 843   if (!POINTER_TYPE_P (type)
 844       && !INTEGRAL_TYPE_P (type))
 845     return NULL;
 846
 847   if (!name_info (data, var)->iv)
 848     {
 849       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 850
 851       if (!bb
 852           || !flow_bb_inside_loop_p (data->current_loop, bb))
 853         set_iv (data, var, var, build_int_cst (type, 0));
 854     }
 855
 856   return name_info (data, var)->iv;
 857 }
 858
 859 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 860    not define a simple affine biv with nonzero step.  */
 861
 862 static tree
 863 determine_biv_step (tree phi)
 864 {
 865   struct loop *loop = bb_for_stmt (phi)->loop_father;
 866   tree name = PHI_RESULT (phi);
 867   affine_iv iv;
 868
 869   if (!is_gimple_reg (name))
 870     return NULL_TREE;
 871
 872   if (!simple_iv (loop, phi, name, &iv, true))
 873     return NULL_TREE;
 874
 875   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 876 }
 877
 878 /* Finds basic ivs.  */
 879
 880 static bool
 881 find_bivs (struct ivopts_data *data)
 882 {
 883   tree phi, step, type, base;
 884   bool found = false;
 885   struct loop *loop = data->current_loop;
 886
 887   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 888     {
 889       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 890         continue;
 891
 892       step = determine_biv_step (phi);
 893       if (!step)
 894         continue;
 895
 896       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 897       base = expand_simple_operations (base);
 898       if (contains_abnormal_ssa_name_p (base)
 899           || contains_abnormal_ssa_name_p (step))
 900         continue;
 901
 902       type = TREE_TYPE (PHI_RESULT (phi));
 903       base = fold_convert (type, base);
 904       if (step)
 905         step = fold_convert (type, step);
 906
 907       set_iv (data, PHI_RESULT (phi), base, step);
 908       found = true;
 909     }
 910
 911   return found;
 912 }
 913
 914 /* Marks basic ivs.  */
 915
 916 static void
 917 mark_bivs (struct ivopts_data *data)
 918 {
 919   tree phi, var;
 920   struct iv *iv, *incr_iv;
 921   struct loop *loop = data->current_loop;
 922   basic_block incr_bb;
 923
 924   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 925     {
 926       iv = get_iv (data, PHI_RESULT (phi));
 927       if (!iv)
 928         continue;
 929
 930       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 931       incr_iv = get_iv (data, var);
 932       if (!incr_iv)
 933         continue;
 934
 935       /* If the increment is in the subloop, ignore it.  */
 936       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 937       if (incr_bb->loop_father != data->current_loop
 938           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 939         continue;
 940
 941       iv->biv_p = true;
 942       incr_iv->biv_p = true;
 943     }
 944 }
 945
 946 /* Checks whether STMT defines a linear induction variable and stores its
 947    parameters to IV.  */
 948
 949 static bool
 950 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 951 {
 952   tree lhs;
 953   struct loop *loop = data->current_loop;
 954
 955   iv->base = NULL_TREE;
 956   iv->step = NULL_TREE;
 957
 958   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 959     return false;
 960
 961   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 962   if (TREE_CODE (lhs) != SSA_NAME)
 963     return false;
 964
 965   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 966     return false;
 967   iv->base = expand_simple_operations (iv->base);
 968
 969   if (contains_abnormal_ssa_name_p (iv->base)
 970       || contains_abnormal_ssa_name_p (iv->step))
 971     return false;
 972
 973   return true;
 974 }
 975
 976 /* Finds general ivs in statement STMT.  */
 977
 978 static void
 979 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
 980 {
 981   affine_iv iv;
 982
 983   if (!find_givs_in_stmt_scev (data, stmt, &iv))
 984     return;
 985
 986   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
 987 }
 988
 989 /* Finds general ivs in basic block BB.  */
 990
 991 static void
 992 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
 993 {
 994   block_stmt_iterator bsi;
 995
 996   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 997     find_givs_in_stmt (data, bsi_stmt (bsi));
 998 }
 999
1000 /* Finds general ivs.  */
1001
1002 static void
1003 find_givs (struct ivopts_data *data)
1004 {
1005   struct loop *loop = data->current_loop;
1006   basic_block *body = get_loop_body_in_dom_order (loop);
1007   unsigned i;
1008
1009   for (i = 0; i < loop->num_nodes; i++)
1010     find_givs_in_bb (data, body[i]);
1011   free (body);
1012 }
1013
1014 /* For each ssa name defined in LOOP determines whether it is an induction
1015    variable and if so, its initial value and step.  */
1016
1017 static bool
1018 find_induction_variables (struct ivopts_data *data)
1019 {
1020   unsigned i;
1021   bitmap_iterator bi;
1022
1023   if (!find_bivs (data))
1024     return false;
1025
1026   find_givs (data);
1027   mark_bivs (data);
1028
1029   if (dump_file && (dump_flags & TDF_DETAILS))
1030     {
1031       tree niter = niter_for_single_dom_exit (data);
1032
1033       if (niter)
1034         {
1035           fprintf (dump_file, "  number of iterations ");
1036           print_generic_expr (dump_file, niter, TDF_SLIM);
1037           fprintf (dump_file, "\n\n");
1038         };
1039
1040       fprintf (dump_file, "Induction variables:\n\n");
1041
1042       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1043         {
1044           if (ver_info (data, i)->iv)
1045             dump_iv (dump_file, ver_info (data, i)->iv);
1046         }
1047     }
1048
1049   return true;
1050 }
1051
1052 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1053
1054 static struct iv_use *
1055 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1056             tree stmt, enum use_type use_type)
1057 {
1058   struct iv_use *use = XCNEW (struct iv_use);
1059
1060   use->id = n_iv_uses (data);
1061   use->type = use_type;
1062   use->iv = iv;
1063   use->stmt = stmt;
1064   use->op_p = use_p;
1065   use->related_cands = BITMAP_ALLOC (NULL);
1066
1067   /* To avoid showing ssa name in the dumps, if it was not reset by the
1068      caller.  */
1069   iv->ssa_name = NULL_TREE;
1070
1071   if (dump_file && (dump_flags & TDF_DETAILS))
1072     dump_use (dump_file, use);
1073
1074   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1075
1076   return use;
1077 }
1078
1079 /* Checks whether OP is a loop-level invariant and if so, records it.
1080    NONLINEAR_USE is true if the invariant is used in a way we do not
1081    handle specially.  */
1082
1083 static void
1084 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1085 {
1086   basic_block bb;
1087   struct version_info *info;
1088
1089   if (TREE_CODE (op) != SSA_NAME
1090       || !is_gimple_reg (op))
1091     return;
1092
1093   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1094   if (bb
1095       && flow_bb_inside_loop_p (data->current_loop, bb))
1096     return;
1097
1098   info = name_info (data, op);
1099   info->name = op;
1100   info->has_nonlin_use |= nonlinear_use;
1101   if (!info->inv_id)
1102     info->inv_id = ++data->max_inv_id;
1103   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1104 }
1105
1106 /* Checks whether the use OP is interesting and if so, records it.  */
1107
1108 static struct iv_use *
1109 find_interesting_uses_op (struct ivopts_data *data, tree op)
1110 {
1111   struct iv *iv;
1112   struct iv *civ;
1113   tree stmt;
1114   struct iv_use *use;
1115
1116   if (TREE_CODE (op) != SSA_NAME)
1117     return NULL;
1118
1119   iv = get_iv (data, op);
1120   if (!iv)
1121     return NULL;
1122
1123   if (iv->have_use_for)
1124     {
1125       use = iv_use (data, iv->use_id);
1126
1127       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1128       return use;
1129     }
1130
1131   if (integer_zerop (iv->step))
1132     {
1133       record_invariant (data, op, true);
1134       return NULL;
1135     }
1136   iv->have_use_for = true;
1137
1138   civ = XNEW (struct iv);
1139   *civ = *iv;
1140
1141   stmt = SSA_NAME_DEF_STMT (op);
1142   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1143               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1144
1145   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1146   iv->use_id = use->id;
1147
1148   return use;
1149 }
1150
1151 /* Given a condition *COND_P, checks whether it is a compare of an induction
1152    variable and an invariant.  If this is the case, CONTROL_VAR is set
1153    to location of the iv, BOUND to the location of the invariant,
1154    IV_VAR and IV_BOUND are set to the corresponding induction variable
1155    descriptions, and true is returned.  If this is not the case,
1156    CONTROL_VAR and BOUND are set to the arguments of the condition and
1157    false is returned.  */
1158
1159 static bool
1160 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1161                        tree **control_var, tree **bound,
1162                        struct iv **iv_var, struct iv **iv_bound)
1163 {
1164   /* The nodes returned when COND has just one operand.  Note that you should
1165      not modify anything in BOUND or IV_BOUND because of this.  */
1166   static struct iv const_iv;
1167   static tree zero;
1168   tree cond = *cond_p;
1169   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1170   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1171   bool ret = false;
1172
1173   zero = integer_zero_node;
1174   const_iv.step = integer_zero_node;
1175
1176   if (TREE_CODE (cond) == SSA_NAME)
1177     {
1178       op0 = cond_p;
1179       iv0 = get_iv (data, cond);
1180       ret = (iv0 && !integer_zerop (iv0->step));
1181       goto end;
1182     }
1183
1184   if (!COMPARISON_CLASS_P (cond))
1185     {
1186       op0 = cond_p;
1187       goto end;
1188     }
1189
1190   op0 = &TREE_OPERAND (cond, 0);
1191   op1 = &TREE_OPERAND (cond, 1);
1192   if (TREE_CODE (*op0) == SSA_NAME)
1193     iv0 = get_iv (data, *op0);
1194   if (TREE_CODE (*op1) == SSA_NAME)
1195     iv1 = get_iv (data, *op1);
1196
1197   /* Exactly one of the compared values must be an iv, and the other one must
1198      be an invariant.  */
1199   if (!iv0 || !iv1)
1200     goto end;
1201
1202   if (integer_zerop (iv0->step))
1203     {
1204       /* Control variable may be on the other side.  */
1205       tmp_op = op0; op0 = op1; op1 = tmp_op;
1206       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1207     }
1208   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1209
1210 end:
1211   if (control_var)
1212     *control_var = op0;;
1213   if (iv_var)
1214     *iv_var = iv0;;
1215   if (bound)
1216     *bound = op1;
1217   if (iv_bound)
1218     *iv_bound = iv1;
1219
1220   return ret;
1221 }
1222
1223 /* Checks whether the condition *COND_P in STMT is interesting
1224    and if so, records it.  */
1225
1226 static void
1227 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1228 {
1229   tree *var_p, *bound_p;
1230   struct iv *var_iv, *civ;
1231
1232   if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1233     {
1234       find_interesting_uses_op (data, *var_p);
1235       find_interesting_uses_op (data, *bound_p);
1236       return;
1237     }
1238
1239   civ = XNEW (struct iv);
1240   *civ = *var_iv;
1241   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1242 }
1243
1244 /* Returns true if expression EXPR is obviously invariant in LOOP,
1245    i.e. if all its operands are defined outside of the LOOP.  */
1246
1247 bool
1248 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1249 {
1250   basic_block def_bb;
1251   unsigned i, len;
1252
1253   if (is_gimple_min_invariant (expr))
1254     return true;
1255
1256   if (TREE_CODE (expr) == SSA_NAME)
1257     {
1258       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1259       if (def_bb
1260           && flow_bb_inside_loop_p (loop, def_bb))
1261         return false;
1262
1263       return true;
1264     }
1265
1266   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1267     return false;
1268
1269   len = TREE_OPERAND_LENGTH (expr);
1270   for (i = 0; i < len; i++)
1271     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1272       return false;
1273
1274   return true;
1275 }
1276
1277 /* Cumulates the steps of indices into DATA and replaces their values with the
1278    initial ones.  Returns false when the value of the index cannot be determined.
1279    Callback for for_each_index.  */
1280
1281 struct ifs_ivopts_data
1282 {
1283   struct ivopts_data *ivopts_data;
1284   tree stmt;
1285   tree step;
1286 };
1287
1288 static bool
1289 idx_find_step (tree base, tree *idx, void *data)
1290 {
1291   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1292   struct iv *iv;
1293   tree step, iv_base, iv_step, lbound, off;
1294   struct loop *loop = dta->ivopts_data->current_loop;
1295
1296   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1297       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1298     return false;
1299
1300   /* If base is a component ref, require that the offset of the reference
1301      be invariant.  */
1302   if (TREE_CODE (base) == COMPONENT_REF)
1303     {
1304       off = component_ref_field_offset (base);
1305       return expr_invariant_in_loop_p (loop, off);
1306     }
1307
1308   /* If base is array, first check whether we will be able to move the
1309      reference out of the loop (in order to take its address in strength
1310      reduction).  In order for this to work we need both lower bound
1311      and step to be loop invariants.  */
1312   if (TREE_CODE (base) == ARRAY_REF)
1313     {
1314       step = array_ref_element_size (base);
1315       lbound = array_ref_low_bound (base);
1316
1317       if (!expr_invariant_in_loop_p (loop, step)
1318           || !expr_invariant_in_loop_p (loop, lbound))
1319         return false;
1320     }
1321
1322   if (TREE_CODE (*idx) != SSA_NAME)
1323     return true;
1324
1325   iv = get_iv (dta->ivopts_data, *idx);
1326   if (!iv)
1327     return false;
1328
1329   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1330           *&x[0], which is not folded and does not trigger the
1331           ARRAY_REF path below.  */
1332   *idx = iv->base;
1333
1334   if (integer_zerop (iv->step))
1335     return true;
1336
1337   if (TREE_CODE (base) == ARRAY_REF)
1338     {
1339       step = array_ref_element_size (base);
1340
1341       /* We only handle addresses whose step is an integer constant.  */
1342       if (TREE_CODE (step) != INTEGER_CST)
1343         return false;
1344     }
1345   else
1346     /* The step for pointer arithmetics already is 1 byte.  */
1347     step = build_int_cst (sizetype, 1);
1348
1349   iv_base = iv->base;
1350   iv_step = iv->step;
1351   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1352                             sizetype, &iv_base, &iv_step, dta->stmt,
1353                             false))
1354     {
1355       /* The index might wrap.  */
1356       return false;
1357     }
1358
1359   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1360   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1361
1362   return true;
1363 }
1364
1365 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1366    object is passed to it in DATA.  */
1367
1368 static bool
1369 idx_record_use (tree base, tree *idx,
1370                 void *vdata)
1371 {
1372   struct ivopts_data *data = (struct ivopts_data *) vdata;
1373   find_interesting_uses_op (data, *idx);
1374   if (TREE_CODE (base) == ARRAY_REF)
1375     {
1376       find_interesting_uses_op (data, array_ref_element_size (base));
1377       find_interesting_uses_op (data, array_ref_low_bound (base));
1378     }
1379   return true;
1380 }
1381
1382 /* Returns true if memory reference REF may be unaligned.  */
1383
1384 static bool
1385 may_be_unaligned_p (tree ref)
1386 {
1387   tree base;
1388   tree base_type;
1389   HOST_WIDE_INT bitsize;
1390   HOST_WIDE_INT bitpos;
1391   tree toffset;
1392   enum machine_mode mode;
1393   int unsignedp, volatilep;
1394   unsigned base_align;
1395
1396   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1397      thus they are not misaligned.  */
1398   if (TREE_CODE (ref) == TARGET_MEM_REF)
1399     return false;
1400
1401   /* The test below is basically copy of what expr.c:normal_inner_ref
1402      does to check whether the object must be loaded by parts when
1403      STRICT_ALIGNMENT is true.  */
1404   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1405                               &unsignedp, &volatilep, true);
1406   base_type = TREE_TYPE (base);
1407   base_align = TYPE_ALIGN (base_type);
1408
1409   if (mode != BLKmode
1410       && (base_align < GET_MODE_ALIGNMENT (mode)
1411           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1412           || bitpos % BITS_PER_UNIT != 0))
1413     return true;
1414
1415   return false;
1416 }
1417
1418 /* Return true if EXPR may be non-addressable.   */
1419
1420 static bool
1421 may_be_nonaddressable_p (tree expr)
1422 {
1423   switch (TREE_CODE (expr))
1424     {
1425     case COMPONENT_REF:
1426       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1427              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1428
1429     case ARRAY_REF:
1430     case ARRAY_RANGE_REF:
1431       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1432
1433     case VIEW_CONVERT_EXPR:
1434       /* This kind of view-conversions may wrap non-addressable objects
1435          and make them look addressable.  After some processing the
1436          non-addressability may be uncovered again, causing ADDR_EXPRs
1437          of inappropriate objects to be built.  */
1438       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1439              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1440
1441     default:
1442       break;
1443     }
1444
1445   return false;
1446 }
1447
1448 /* Finds addresses in *OP_P inside STMT.  */
1449
1450 static void
1451 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1452 {
1453   tree base = *op_p, step = build_int_cst (sizetype, 0);
1454   struct iv *civ;
1455   struct ifs_ivopts_data ifs_ivopts_data;
1456
1457   /* Do not play with volatile memory references.  A bit too conservative,
1458      perhaps, but safe.  */
1459   if (stmt_ann (stmt)->has_volatile_ops)
1460     goto fail;
1461
1462   /* Ignore bitfields for now.  Not really something terribly complicated
1463      to handle.  TODO.  */
1464   if (TREE_CODE (base) == BIT_FIELD_REF)
1465     goto fail;
1466
1467   if (may_be_nonaddressable_p (base))
1468     goto fail;
1469
1470   if (STRICT_ALIGNMENT
1471       && may_be_unaligned_p (base))
1472     goto fail;
1473
1474   base = unshare_expr (base);
1475
1476   if (TREE_CODE (base) == TARGET_MEM_REF)
1477     {
1478       tree type = build_pointer_type (TREE_TYPE (base));
1479       tree astep;
1480
1481       if (TMR_BASE (base)
1482           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1483         {
1484           civ = get_iv (data, TMR_BASE (base));
1485           if (!civ)
1486             goto fail;
1487
1488           TMR_BASE (base) = civ->base;
1489           step = civ->step;
1490         }
1491       if (TMR_INDEX (base)
1492           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1493         {
1494           civ = get_iv (data, TMR_INDEX (base));
1495           if (!civ)
1496             goto fail;
1497
1498           TMR_INDEX (base) = civ->base;
1499           astep = civ->step;
1500
1501           if (astep)
1502             {
1503               if (TMR_STEP (base))
1504                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1505
1506               step = fold_build2 (PLUS_EXPR, type, step, astep);
1507             }
1508         }
1509
1510       if (integer_zerop (step))
1511         goto fail;
1512       base = tree_mem_ref_addr (type, base);
1513     }
1514   else
1515     {
1516       ifs_ivopts_data.ivopts_data = data;
1517       ifs_ivopts_data.stmt = stmt;
1518       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1519       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1520           || integer_zerop (ifs_ivopts_data.step))
1521         goto fail;
1522       step = ifs_ivopts_data.step;
1523
1524       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1525       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1526
1527       base = build_fold_addr_expr (base);
1528
1529       /* Substituting bases of IVs into the base expression might
1530          have caused folding opportunities.  */
1531       if (TREE_CODE (base) == ADDR_EXPR)
1532         {
1533           tree *ref = &TREE_OPERAND (base, 0);
1534           while (handled_component_p (*ref))
1535             ref = &TREE_OPERAND (*ref, 0);
1536           if (TREE_CODE (*ref) == INDIRECT_REF)
1537             *ref = fold_indirect_ref (*ref);
1538         }
1539     }
1540
1541   civ = alloc_iv (base, step);
1542   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1543   return;
1544
1545 fail:
1546   for_each_index (op_p, idx_record_use, data);
1547 }
1548
1549 /* Finds and records invariants used in STMT.  */
1550
1551 static void
1552 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1553 {
1554   ssa_op_iter iter;
1555   use_operand_p use_p;
1556   tree op;
1557
1558   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1559     {
1560       op = USE_FROM_PTR (use_p);
1561       record_invariant (data, op, false);
1562     }
1563 }
1564
1565 /* Finds interesting uses of induction variables in the statement STMT.  */
1566
1567 static void
1568 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1569 {
1570   struct iv *iv;
1571   tree op, lhs, rhs;
1572   ssa_op_iter iter;
1573   use_operand_p use_p;
1574
1575   find_invariants_stmt (data, stmt);
1576
1577   if (TREE_CODE (stmt) == COND_EXPR)
1578     {
1579       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1580       return;
1581     }
1582
1583   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1584     {
1585       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1586       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1587
1588       if (TREE_CODE (lhs) == SSA_NAME)
1589         {
1590           /* If the statement defines an induction variable, the uses are not
1591              interesting by themselves.  */
1592
1593           iv = get_iv (data, lhs);
1594
1595           if (iv && !integer_zerop (iv->step))
1596             return;
1597         }
1598
1599       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1600         {
1601         case tcc_comparison:
1602           find_interesting_uses_cond (data, stmt,
1603                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1604           return;
1605
1606         case tcc_reference:
1607           find_interesting_uses_address (data, stmt,
1608                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1609           if (REFERENCE_CLASS_P (lhs))
1610             find_interesting_uses_address (data, stmt,
1611                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1612           return;
1613
1614         default: ;
1615         }
1616
1617       if (REFERENCE_CLASS_P (lhs)
1618           && is_gimple_val (rhs))
1619         {
1620           find_interesting_uses_address (data, stmt,
1621                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1622           find_interesting_uses_op (data, rhs);
1623           return;
1624         }
1625
1626       /* TODO -- we should also handle address uses of type
1627
1628          memory = call (whatever);
1629
1630          and
1631
1632          call (memory).  */
1633     }
1634
1635   if (TREE_CODE (stmt) == PHI_NODE
1636       && bb_for_stmt (stmt) == data->current_loop->header)
1637     {
1638       lhs = PHI_RESULT (stmt);
1639       iv = get_iv (data, lhs);
1640
1641       if (iv && !integer_zerop (iv->step))
1642         return;
1643     }
1644
1645   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1646     {
1647       op = USE_FROM_PTR (use_p);
1648
1649       if (TREE_CODE (op) != SSA_NAME)
1650         continue;
1651
1652       iv = get_iv (data, op);
1653       if (!iv)
1654         continue;
1655
1656       find_interesting_uses_op (data, op);
1657     }
1658 }
1659
1660 /* Finds interesting uses of induction variables outside of loops
1661    on loop exit edge EXIT.  */
1662
1663 static void
1664 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1665 {
1666   tree phi, def;
1667
1668   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1669     {
1670       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1671       if (is_gimple_reg (def))
1672         find_interesting_uses_op (data, def);
1673     }
1674 }
1675
1676 /* Finds uses of the induction variables that are interesting.  */
1677
1678 static void
1679 find_interesting_uses (struct ivopts_data *data)
1680 {
1681   basic_block bb;
1682   block_stmt_iterator bsi;
1683   tree phi;
1684   basic_block *body = get_loop_body (data->current_loop);
1685   unsigned i;
1686   struct version_info *info;
1687   edge e;
1688
1689   if (dump_file && (dump_flags & TDF_DETAILS))
1690     fprintf (dump_file, "Uses:\n\n");
1691
1692   for (i = 0; i < data->current_loop->num_nodes; i++)
1693     {
1694       edge_iterator ei;
1695       bb = body[i];
1696
1697       FOR_EACH_EDGE (e, ei, bb->succs)
1698         if (e->dest != EXIT_BLOCK_PTR
1699             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1700           find_interesting_uses_outside (data, e);
1701
1702       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1703         find_interesting_uses_stmt (data, phi);
1704       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1705         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1706     }
1707
1708   if (dump_file && (dump_flags & TDF_DETAILS))
1709     {
1710       bitmap_iterator bi;
1711
1712       fprintf (dump_file, "\n");
1713
1714       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1715         {
1716           info = ver_info (data, i);
1717           if (info->inv_id)
1718             {
1719               fprintf (dump_file, "  ");
1720               print_generic_expr (dump_file, info->name, TDF_SLIM);
1721               fprintf (dump_file, " is invariant (%d)%s\n",
1722                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1723             }
1724         }
1725
1726       fprintf (dump_file, "\n");
1727     }
1728
1729   free (body);
1730 }
1731
1732 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1733    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1734    we are at the top-level of the processed address.  */
1735
1736 static tree
1737 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1738                 unsigned HOST_WIDE_INT *offset)
1739 {
1740   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1741   enum tree_code code;
1742   tree type, orig_type = TREE_TYPE (expr);
1743   unsigned HOST_WIDE_INT off0, off1, st;
1744   tree orig_expr = expr;
1745
1746   STRIP_NOPS (expr);
1747
1748   type = TREE_TYPE (expr);
1749   code = TREE_CODE (expr);
1750   *offset = 0;
1751
1752   switch (code)
1753     {
1754     case INTEGER_CST:
1755       if (!cst_and_fits_in_hwi (expr)
1756           || integer_zerop (expr))
1757         return orig_expr;
1758
1759       *offset = int_cst_value (expr);
1760       return build_int_cst (orig_type, 0);
1761
1762     case PLUS_EXPR:
1763     case MINUS_EXPR:
1764       op0 = TREE_OPERAND (expr, 0);
1765       op1 = TREE_OPERAND (expr, 1);
1766
1767       op0 = strip_offset_1 (op0, false, false, &off0);
1768       op1 = strip_offset_1 (op1, false, false, &off1);
1769
1770       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1771       if (op0 == TREE_OPERAND (expr, 0)
1772           && op1 == TREE_OPERAND (expr, 1))
1773         return orig_expr;
1774
1775       if (integer_zerop (op1))
1776         expr = op0;
1777       else if (integer_zerop (op0))
1778         {
1779           if (code == PLUS_EXPR)
1780             expr = op1;
1781           else
1782             expr = fold_build1 (NEGATE_EXPR, type, op1);
1783         }
1784       else
1785         expr = fold_build2 (code, type, op0, op1);
1786
1787       return fold_convert (orig_type, expr);
1788
1789     case ARRAY_REF:
1790       if (!inside_addr)
1791         return orig_expr;
1792
1793       step = array_ref_element_size (expr);
1794       if (!cst_and_fits_in_hwi (step))
1795         break;
1796
1797       st = int_cst_value (step);
1798       op1 = TREE_OPERAND (expr, 1);
1799       op1 = strip_offset_1 (op1, false, false, &off1);
1800       *offset = off1 * st;
1801
1802       if (top_compref
1803           && integer_zerop (op1))
1804         {
1805           /* Strip the component reference completely.  */
1806           op0 = TREE_OPERAND (expr, 0);
1807           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1808           *offset += off0;
1809           return op0;
1810         }
1811       break;
1812
1813     case COMPONENT_REF:
1814       if (!inside_addr)
1815         return orig_expr;
1816
1817       tmp = component_ref_field_offset (expr);
1818       if (top_compref
1819           && cst_and_fits_in_hwi (tmp))
1820         {
1821           /* Strip the component reference completely.  */
1822           op0 = TREE_OPERAND (expr, 0);
1823           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1824           *offset = off0 + int_cst_value (tmp);
1825           return op0;
1826         }
1827       break;
1828
1829     case ADDR_EXPR:
1830       op0 = TREE_OPERAND (expr, 0);
1831       op0 = strip_offset_1 (op0, true, true, &off0);
1832       *offset += off0;
1833
1834       if (op0 == TREE_OPERAND (expr, 0))
1835         return orig_expr;
1836
1837       expr = build_fold_addr_expr (op0);
1838       return fold_convert (orig_type, expr);
1839
1840     case INDIRECT_REF:
1841       inside_addr = false;
1842       break;
1843
1844     default:
1845       return orig_expr;
1846     }
1847
1848   /* Default handling of expressions for that we want to recurse into
1849      the first operand.  */
1850   op0 = TREE_OPERAND (expr, 0);
1851   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1852   *offset += off0;
1853
1854   if (op0 == TREE_OPERAND (expr, 0)
1855       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1856     return orig_expr;
1857
1858   expr = copy_node (expr);
1859   TREE_OPERAND (expr, 0) = op0;
1860   if (op1)
1861     TREE_OPERAND (expr, 1) = op1;
1862
1863   /* Inside address, we might strip the top level component references,
1864      thus changing type of the expression.  Handling of ADDR_EXPR
1865      will fix that.  */
1866   expr = fold_convert (orig_type, expr);
1867
1868   return expr;
1869 }
1870
1871 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1872
1873 static tree
1874 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1875 {
1876   return strip_offset_1 (expr, false, false, offset);
1877 }
1878
1879 /* Returns variant of TYPE that can be used as base for different uses.
1880    We return unsigned type with the same precision, which avoids problems
1881    with overflows.  */
1882
1883 static tree
1884 generic_type_for (tree type)
1885 {
1886   if (POINTER_TYPE_P (type))
1887     return unsigned_type_for (type);
1888
1889   if (TYPE_UNSIGNED (type))
1890     return type;
1891
1892   return unsigned_type_for (type);
1893 }
1894
1895 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1896    the bitmap to that we should store it.  */
1897
1898 static struct ivopts_data *fd_ivopts_data;
1899 static tree
1900 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1901 {
1902   bitmap *depends_on = (bitmap *) data;
1903   struct version_info *info;
1904
1905   if (TREE_CODE (*expr_p) != SSA_NAME)
1906     return NULL_TREE;
1907   info = name_info (fd_ivopts_data, *expr_p);
1908
1909   if (!info->inv_id || info->has_nonlin_use)
1910     return NULL_TREE;
1911
1912   if (!*depends_on)
1913     *depends_on = BITMAP_ALLOC (NULL);
1914   bitmap_set_bit (*depends_on, info->inv_id);
1915
1916   return NULL_TREE;
1917 }
1918
1919 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1920    position to POS.  If USE is not NULL, the candidate is set as related to
1921    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1922    replacement of the final value of the iv by a direct computation.  */
1923
1924 static struct iv_cand *
1925 add_candidate_1 (struct ivopts_data *data,
1926                  tree base, tree step, bool important, enum iv_position pos,
1927                  struct iv_use *use, tree incremented_at)
1928 {
1929   unsigned i;
1930   struct iv_cand *cand = NULL;
1931   tree type, orig_type;
1932
1933   if (base)
1934     {
1935       orig_type = TREE_TYPE (base);
1936       type = generic_type_for (orig_type);
1937       if (type != orig_type)
1938         {
1939           base = fold_convert (type, base);
1940           step = fold_convert (type, step);
1941         }
1942     }
1943
1944   for (i = 0; i < n_iv_cands (data); i++)
1945     {
1946       cand = iv_cand (data, i);
1947
1948       if (cand->pos != pos)
1949         continue;
1950
1951       if (cand->incremented_at != incremented_at)
1952         continue;
1953
1954       if (!cand->iv)
1955         {
1956           if (!base && !step)
1957             break;
1958
1959           continue;
1960         }
1961
1962       if (!base && !step)
1963         continue;
1964
1965       if (operand_equal_p (base, cand->iv->base, 0)
1966           && operand_equal_p (step, cand->iv->step, 0))
1967         break;
1968     }
1969
1970   if (i == n_iv_cands (data))
1971     {
1972       cand = XCNEW (struct iv_cand);
1973       cand->id = i;
1974
1975       if (!base && !step)
1976         cand->iv = NULL;
1977       else
1978         cand->iv = alloc_iv (base, step);
1979
1980       cand->pos = pos;
1981       if (pos != IP_ORIGINAL && cand->iv)
1982         {
1983           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1984           cand->var_after = cand->var_before;
1985         }
1986       cand->important = important;
1987       cand->incremented_at = incremented_at;
1988       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1989
1990       if (step
1991           && TREE_CODE (step) != INTEGER_CST)
1992         {
1993           fd_ivopts_data = data;
1994           walk_tree (&step, find_depends, &cand->depends_on, NULL);
1995         }
1996
1997       if (dump_file && (dump_flags & TDF_DETAILS))
1998         dump_cand (dump_file, cand);
1999     }
2000
2001   if (important && !cand->important)
2002     {
2003       cand->important = true;
2004       if (dump_file && (dump_flags & TDF_DETAILS))
2005         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2006     }
2007
2008   if (use)
2009     {
2010       bitmap_set_bit (use->related_cands, i);
2011       if (dump_file && (dump_flags & TDF_DETAILS))
2012         fprintf (dump_file, "Candidate %d is related to use %d\n",
2013                  cand->id, use->id);
2014     }
2015
2016   return cand;
2017 }
2018
2019 /* Returns true if incrementing the induction variable at the end of the LOOP
2020    is allowed.
2021
2022    The purpose is to avoid splitting latch edge with a biv increment, thus
2023    creating a jump, possibly confusing other optimization passes and leaving
2024    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2025    is not available (so we do not have a better alternative), or if the latch
2026    edge is already nonempty.  */
2027
2028 static bool
2029 allow_ip_end_pos_p (struct loop *loop)
2030 {
2031   if (!ip_normal_pos (loop))
2032     return true;
2033
2034   if (!empty_block_p (ip_end_pos (loop)))
2035     return true;
2036
2037   return false;
2038 }
2039
2040 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2041    position to POS.  If USE is not NULL, the candidate is set as related to
2042    it.  The candidate computation is scheduled on all available positions.  */
2043
2044 static void
2045 add_candidate (struct ivopts_data *data,
2046                tree base, tree step, bool important, struct iv_use *use)
2047 {
2048   if (ip_normal_pos (data->current_loop))
2049     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2050   if (ip_end_pos (data->current_loop)
2051       && allow_ip_end_pos_p (data->current_loop))
2052     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2053 }
2054
2055 /* Add a standard "0 + 1 * iteration" iv candidate for a
2056    type with SIZE bits.  */
2057
2058 static void
2059 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2060                                      unsigned int size)
2061 {
2062   tree type = lang_hooks.types.type_for_size (size, true);
2063   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2064                  true, NULL);
2065 }
2066
2067 /* Adds standard iv candidates.  */
2068
2069 static void
2070 add_standard_iv_candidates (struct ivopts_data *data)
2071 {
2072   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2073
2074   /* The same for a double-integer type if it is still fast enough.  */
2075   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2076     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2077 }
2078
2079
2080 /* Adds candidates bases on the old induction variable IV.  */
2081
2082 static void
2083 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2084 {
2085   tree phi, def;
2086   struct iv_cand *cand;
2087
2088   add_candidate (data, iv->base, iv->step, true, NULL);
2089
2090   /* The same, but with initial value zero.  */
2091   add_candidate (data,
2092                  build_int_cst (TREE_TYPE (iv->base), 0),
2093                  iv->step, true, NULL);
2094
2095   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2096   if (TREE_CODE (phi) == PHI_NODE)
2097     {
2098       /* Additionally record the possibility of leaving the original iv
2099          untouched.  */
2100       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2101       cand = add_candidate_1 (data,
2102                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2103                               SSA_NAME_DEF_STMT (def));
2104       cand->var_before = iv->ssa_name;
2105       cand->var_after = def;
2106     }
2107 }
2108
2109 /* Adds candidates based on the old induction variables.  */
2110
2111 static void
2112 add_old_ivs_candidates (struct ivopts_data *data)
2113 {
2114   unsigned i;
2115   struct iv *iv;
2116   bitmap_iterator bi;
2117
2118   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2119     {
2120       iv = ver_info (data, i)->iv;
2121       if (iv && iv->biv_p && !integer_zerop (iv->step))
2122         add_old_iv_candidates (data, iv);
2123     }
2124 }
2125
2126 /* Adds candidates based on the value of the induction variable IV and USE.  */
2127
2128 static void
2129 add_iv_value_candidates (struct ivopts_data *data,
2130                          struct iv *iv, struct iv_use *use)
2131 {
2132   unsigned HOST_WIDE_INT offset;
2133   tree base;
2134
2135   add_candidate (data, iv->base, iv->step, false, use);
2136
2137   /* The same, but with initial value zero.  Make such variable important,
2138      since it is generic enough so that possibly many uses may be based
2139      on it.  */
2140   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2141                  iv->step, true, use);
2142
2143   /* Third, try removing the constant offset.  */
2144   base = strip_offset (iv->base, &offset);
2145   if (offset)
2146     add_candidate (data, base, iv->step, false, use);
2147 }
2148
2149 /* Adds candidates based on the uses.  */
2150
2151 static void
2152 add_derived_ivs_candidates (struct ivopts_data *data)
2153 {
2154   unsigned i;
2155
2156   for (i = 0; i < n_iv_uses (data); i++)
2157     {
2158       struct iv_use *use = iv_use (data, i);
2159
2160       if (!use)
2161         continue;
2162
2163       switch (use->type)
2164         {
2165         case USE_NONLINEAR_EXPR:
2166         case USE_COMPARE:
2167         case USE_ADDRESS:
2168           /* Just add the ivs based on the value of the iv used here.  */
2169           add_iv_value_candidates (data, use->iv, use);
2170           break;
2171
2172         default:
2173           gcc_unreachable ();
2174         }
2175     }
2176 }
2177
2178 /* Record important candidates and add them to related_cands bitmaps
2179    if needed.  */
2180
2181 static void
2182 record_important_candidates (struct ivopts_data *data)
2183 {
2184   unsigned i;
2185   struct iv_use *use;
2186
2187   for (i = 0; i < n_iv_cands (data); i++)
2188     {
2189       struct iv_cand *cand = iv_cand (data, i);
2190
2191       if (cand->important)
2192         bitmap_set_bit (data->important_candidates, i);
2193     }
2194
2195   data->consider_all_candidates = (n_iv_cands (data)
2196                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2197
2198   if (data->consider_all_candidates)
2199     {
2200       /* We will not need "related_cands" bitmaps in this case,
2201          so release them to decrease peak memory consumption.  */
2202       for (i = 0; i < n_iv_uses (data); i++)
2203         {
2204           use = iv_use (data, i);
2205           BITMAP_FREE (use->related_cands);
2206         }
2207     }
2208   else
2209     {
2210       /* Add important candidates to the related_cands bitmaps.  */
2211       for (i = 0; i < n_iv_uses (data); i++)
2212         bitmap_ior_into (iv_use (data, i)->related_cands,
2213                          data->important_candidates);
2214     }
2215 }
2216
2217 /* Finds the candidates for the induction variables.  */
2218
2219 static void
2220 find_iv_candidates (struct ivopts_data *data)
2221 {
2222   /* Add commonly used ivs.  */
2223   add_standard_iv_candidates (data);
2224
2225   /* Add old induction variables.  */
2226   add_old_ivs_candidates (data);
2227
2228   /* Add induction variables derived from uses.  */
2229   add_derived_ivs_candidates (data);
2230
2231   /* Record the important candidates.  */
2232   record_important_candidates (data);
2233 }
2234
2235 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2236    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2237    we allocate a simple list to every use.  */
2238
2239 static void
2240 alloc_use_cost_map (struct ivopts_data *data)
2241 {
2242   unsigned i, size, s, j;
2243
2244   for (i = 0; i < n_iv_uses (data); i++)
2245     {
2246       struct iv_use *use = iv_use (data, i);
2247       bitmap_iterator bi;
2248
2249       if (data->consider_all_candidates)
2250         size = n_iv_cands (data);
2251       else
2252         {
2253           s = 0;
2254           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2255             {
2256               s++;
2257             }
2258
2259           /* Round up to the power of two, so that moduling by it is fast.  */
2260           for (size = 1; size < s; size <<= 1)
2261             continue;
2262         }
2263
2264       use->n_map_members = size;
2265       use->cost_map = XCNEWVEC (struct cost_pair, size);
2266     }
2267 }
2268
2269 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2270    on invariants DEPENDS_ON and that the value used in expressing it
2271    is VALUE.*/
2272
2273 static void
2274 set_use_iv_cost (struct ivopts_data *data,
2275                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2276                  bitmap depends_on, tree value)
2277 {
2278   unsigned i, s;
2279
2280   if (cost == INFTY)
2281     {
2282       BITMAP_FREE (depends_on);
2283       return;
2284     }
2285
2286   if (data->consider_all_candidates)
2287     {
2288       use->cost_map[cand->id].cand = cand;
2289       use->cost_map[cand->id].cost = cost;
2290       use->cost_map[cand->id].depends_on = depends_on;
2291       use->cost_map[cand->id].value = value;
2292       return;
2293     }
2294
2295   /* n_map_members is a power of two, so this computes modulo.  */
2296   s = cand->id & (use->n_map_members - 1);
2297   for (i = s; i < use->n_map_members; i++)
2298     if (!use->cost_map[i].cand)
2299       goto found;
2300   for (i = 0; i < s; i++)
2301     if (!use->cost_map[i].cand)
2302       goto found;
2303
2304   gcc_unreachable ();
2305
2306 found:
2307   use->cost_map[i].cand = cand;
2308   use->cost_map[i].cost = cost;
2309   use->cost_map[i].depends_on = depends_on;
2310   use->cost_map[i].value = value;
2311 }
2312
2313 /* Gets cost of (USE, CANDIDATE) pair.  */
2314
2315 static struct cost_pair *
2316 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2317                  struct iv_cand *cand)
2318 {
2319   unsigned i, s;
2320   struct cost_pair *ret;
2321
2322   if (!cand)
2323     return NULL;
2324
2325   if (data->consider_all_candidates)
2326     {
2327       ret = use->cost_map + cand->id;
2328       if (!ret->cand)
2329         return NULL;
2330
2331       return ret;
2332     }
2333
2334   /* n_map_members is a power of two, so this computes modulo.  */
2335   s = cand->id & (use->n_map_members - 1);
2336   for (i = s; i < use->n_map_members; i++)
2337     if (use->cost_map[i].cand == cand)
2338       return use->cost_map + i;
2339
2340   for (i = 0; i < s; i++)
2341     if (use->cost_map[i].cand == cand)
2342       return use->cost_map + i;
2343
2344   return NULL;
2345 }
2346
2347 /* Returns estimate on cost of computing SEQ.  */
2348
2349 static unsigned
2350 seq_cost (rtx seq)
2351 {
2352   unsigned cost = 0;
2353   rtx set;
2354
2355   for (; seq; seq = NEXT_INSN (seq))
2356     {
2357       set = single_set (seq);
2358       if (set)
2359         cost += rtx_cost (set, SET);
2360       else
2361         cost++;
2362     }
2363
2364   return cost;
2365 }
2366
2367 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2368 static rtx
2369 produce_memory_decl_rtl (tree obj, int *regno)
2370 {
2371   rtx x;
2372
2373   gcc_assert (obj);
2374   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2375     {
2376       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2377       x = gen_rtx_SYMBOL_REF (Pmode, name);
2378       SET_SYMBOL_REF_DECL (x, obj);
2379       x = gen_rtx_MEM (DECL_MODE (obj), x);
2380       targetm.encode_section_info (obj, x, true);
2381     }
2382   else
2383     {
2384       x = gen_raw_REG (Pmode, (*regno)++);
2385       x = gen_rtx_MEM (DECL_MODE (obj), x);
2386     }
2387
2388   return x;
2389 }
2390
2391 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2392    walk_tree.  DATA contains the actual fake register number.  */
2393
2394 static tree
2395 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2396 {
2397   tree obj = NULL_TREE;
2398   rtx x = NULL_RTX;
2399   int *regno = (int *) data;
2400
2401   switch (TREE_CODE (*expr_p))
2402     {
2403     case ADDR_EXPR:
2404       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2405            handled_component_p (*expr_p);
2406            expr_p = &TREE_OPERAND (*expr_p, 0))
2407         continue;
2408       obj = *expr_p;
2409       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2410         x = produce_memory_decl_rtl (obj, regno);
2411       break;
2412
2413     case SSA_NAME:
2414       *ws = 0;
2415       obj = SSA_NAME_VAR (*expr_p);
2416       if (!DECL_RTL_SET_P (obj))
2417         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2418       break;
2419
2420     case VAR_DECL:
2421     case PARM_DECL:
2422     case RESULT_DECL:
2423       *ws = 0;
2424       obj = *expr_p;
2425
2426       if (DECL_RTL_SET_P (obj))
2427         break;
2428
2429       if (DECL_MODE (obj) == BLKmode)
2430         x = produce_memory_decl_rtl (obj, regno);
2431       else
2432         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2433
2434       break;
2435
2436     default:
2437       break;
2438     }
2439
2440   if (x)
2441     {
2442       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2443       SET_DECL_RTL (obj, x);
2444     }
2445
2446   return NULL_TREE;
2447 }
2448
2449 /* Determines cost of the computation of EXPR.  */
2450
2451 static unsigned
2452 computation_cost (tree expr)
2453 {
2454   rtx seq, rslt;
2455   tree type = TREE_TYPE (expr);
2456   unsigned cost;
2457   /* Avoid using hard regs in ways which may be unsupported.  */
2458   int regno = LAST_VIRTUAL_REGISTER + 1;
2459
2460   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2461   start_sequence ();
2462   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2463   seq = get_insns ();
2464   end_sequence ();
2465
2466   cost = seq_cost (seq);
2467   if (MEM_P (rslt))
2468     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2469
2470   return cost;
2471 }
2472
2473 /* Returns variable containing the value of candidate CAND at statement AT.  */
2474
2475 static tree
2476 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2477 {
2478   if (stmt_after_increment (loop, cand, stmt))
2479     return cand->var_after;
2480   else
2481     return cand->var_before;
2482 }
2483
2484 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2485    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2486
2487 int
2488 tree_int_cst_sign_bit (tree t)
2489 {
2490   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2491   unsigned HOST_WIDE_INT w;
2492
2493   if (bitno < HOST_BITS_PER_WIDE_INT)
2494     w = TREE_INT_CST_LOW (t);
2495   else
2496     {
2497       w = TREE_INT_CST_HIGH (t);
2498       bitno -= HOST_BITS_PER_WIDE_INT;
2499     }
2500
2501   return (w >> bitno) & 1;
2502 }
2503
2504 /* If we can prove that TOP = cst * BOT for some constant cst,
2505    store cst to MUL and return true.  Otherwise return false.
2506    The returned value is always sign-extended, regardless of the
2507    signedness of TOP and BOT.  */
2508
2509 static bool
2510 constant_multiple_of (tree top, tree bot, double_int *mul)
2511 {
2512   tree mby;
2513   enum tree_code code;
2514   double_int res, p0, p1;
2515   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2516
2517   STRIP_NOPS (top);
2518   STRIP_NOPS (bot);
2519
2520   if (operand_equal_p (top, bot, 0))
2521     {
2522       *mul = double_int_one;
2523       return true;
2524     }
2525
2526   code = TREE_CODE (top);
2527   switch (code)
2528     {
2529     case MULT_EXPR:
2530       mby = TREE_OPERAND (top, 1);
2531       if (TREE_CODE (mby) != INTEGER_CST)
2532         return false;
2533
2534       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2535         return false;
2536
2537       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2538                               precision);
2539       return true;
2540
2541     case PLUS_EXPR:
2542     case MINUS_EXPR:
2543       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2544           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2545         return false;
2546
2547       if (code == MINUS_EXPR)
2548         p1 = double_int_neg (p1);
2549       *mul = double_int_sext (double_int_add (p0, p1), precision);
2550       return true;
2551
2552     case INTEGER_CST:
2553       if (TREE_CODE (bot) != INTEGER_CST)
2554         return false;
2555
2556       p0 = double_int_sext (tree_to_double_int (top), precision);
2557       p1 = double_int_sext (tree_to_double_int (bot), precision);
2558       if (double_int_zero_p (p1))
2559         return false;
2560       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2561                               precision);
2562       return double_int_zero_p (res);
2563
2564     default:
2565       return false;
2566     }
2567 }
2568
2569 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2570    same precision that is at least as wide as the precision of TYPE, stores
2571    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2572    type of A and B.  */
2573
2574 static tree
2575 determine_common_wider_type (tree *a, tree *b)
2576 {
2577   tree wider_type = NULL;
2578   tree suba, subb;
2579   tree atype = TREE_TYPE (*a);
2580
2581   if ((TREE_CODE (*a) == NOP_EXPR
2582        || TREE_CODE (*a) == CONVERT_EXPR))
2583     {
2584       suba = TREE_OPERAND (*a, 0);
2585       wider_type = TREE_TYPE (suba);
2586       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2587         return atype;
2588     }
2589   else
2590     return atype;
2591
2592   if ((TREE_CODE (*b) == NOP_EXPR
2593        || TREE_CODE (*b) == CONVERT_EXPR))
2594     {
2595       subb = TREE_OPERAND (*b, 0);
2596       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2597         return atype;
2598     }
2599   else
2600     return atype;
2601
2602   *a = suba;
2603   *b = subb;
2604   return wider_type;
2605 }
2606
2607 /* Determines the expression by that USE is expressed from induction variable
2608    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2609    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2610
2611 static bool
2612 get_computation_aff (struct loop *loop,
2613                      struct iv_use *use, struct iv_cand *cand, tree at,
2614                      struct affine_tree_combination *aff)
2615 {
2616   tree ubase = use->iv->base;
2617   tree ustep = use->iv->step;
2618   tree cbase = cand->iv->base;
2619   tree cstep = cand->iv->step, cstep_common;
2620   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2621   tree common_type, var;
2622   tree uutype;
2623   aff_tree cbase_aff, var_aff;
2624   double_int rat;
2625
2626   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2627     {
2628       /* We do not have a precision to express the values of use.  */
2629       return false;
2630     }
2631
2632   var = var_at_stmt (loop, cand, at);
2633   uutype = unsigned_type_for (utype);
2634
2635   /* If the conversion is not noop, perform it.  */
2636   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2637     {
2638       cstep = fold_convert (uutype, cstep);
2639       cbase = fold_convert (uutype, cbase);
2640       var = fold_convert (uutype, var);
2641     }
2642
2643   if (!constant_multiple_of (ustep, cstep, &rat))
2644     return false;
2645
2646   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2647      type, we achieve better folding by computing their difference in this
2648      wider type, and cast the result to UUTYPE.  We do not need to worry about
2649      overflows, as all the arithmetics will in the end be performed in UUTYPE
2650      anyway.  */
2651   common_type = determine_common_wider_type (&ubase, &cbase);
2652
2653   /* use = ubase - ratio * cbase + ratio * var.  */
2654   tree_to_aff_combination (ubase, common_type, aff);
2655   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2656   tree_to_aff_combination (var, uutype, &var_aff);
2657
2658   /* We need to shift the value if we are after the increment.  */
2659   if (stmt_after_increment (loop, cand, at))
2660     {
2661       aff_tree cstep_aff;
2662
2663       if (common_type != uutype)
2664         cstep_common = fold_convert (common_type, cstep);
2665       else
2666         cstep_common = cstep;
2667
2668       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2669       aff_combination_add (&cbase_aff, &cstep_aff);
2670     }
2671
2672   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2673   aff_combination_add (aff, &cbase_aff);
2674   if (common_type != uutype)
2675     aff_combination_convert (aff, uutype);
2676
2677   aff_combination_scale (&var_aff, rat);
2678   aff_combination_add (aff, &var_aff);
2679
2680   return true;
2681 }
2682
2683 /* Determines the expression by that USE is expressed from induction variable
2684    CAND at statement AT in LOOP.  The computation is unshared.  */
2685
2686 static tree
2687 get_computation_at (struct loop *loop,
2688                     struct iv_use *use, struct iv_cand *cand, tree at)
2689 {
2690   aff_tree aff;
2691   tree type = TREE_TYPE (use->iv->base);
2692
2693   if (!get_computation_aff (loop, use, cand, at, &aff))
2694     return NULL_TREE;
2695   unshare_aff_combination (&aff);
2696   return fold_convert (type, aff_combination_to_tree (&aff));
2697 }
2698
2699 /* Determines the expression by that USE is expressed from induction variable
2700    CAND in LOOP.  The computation is unshared.  */
2701
2702 static tree
2703 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2704 {
2705   return get_computation_at (loop, use, cand, use->stmt);
2706 }
2707
2708 /* Returns cost of addition in MODE.  */
2709
2710 static unsigned
2711 add_cost (enum machine_mode mode)
2712 {
2713   static unsigned costs[NUM_MACHINE_MODES];
2714   rtx seq;
2715   unsigned cost;
2716
2717   if (costs[mode])
2718     return costs[mode];
2719
2720   start_sequence ();
2721   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2722                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2723                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2724                  NULL_RTX);
2725   seq = get_insns ();
2726   end_sequence ();
2727
2728   cost = seq_cost (seq);
2729   if (!cost)
2730     cost = 1;
2731
2732   costs[mode] = cost;
2733
2734   if (dump_file && (dump_flags & TDF_DETAILS))
2735     fprintf (dump_file, "Addition in %s costs %d\n",
2736              GET_MODE_NAME (mode), cost);
2737   return cost;
2738 }
2739
2740 /* Entry in a hashtable of already known costs for multiplication.  */
2741 struct mbc_entry
2742 {
2743   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2744   enum machine_mode mode;       /* In mode.  */
2745   unsigned cost;                /* The cost.  */
2746 };
2747
2748 /* Counts hash value for the ENTRY.  */
2749
2750 static hashval_t
2751 mbc_entry_hash (const void *entry)
2752 {
2753   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2754
2755   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2756 }
2757
2758 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2759
2760 static int
2761 mbc_entry_eq (const void *entry1, const void *entry2)
2762 {
2763   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2764   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2765
2766   return (e1->mode == e2->mode
2767           && e1->cst == e2->cst);
2768 }
2769
2770 /* Returns cost of multiplication by constant CST in MODE.  */
2771
2772 unsigned
2773 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2774 {
2775   static htab_t costs;
2776   struct mbc_entry **cached, act;
2777   rtx seq;
2778   unsigned cost;
2779
2780   if (!costs)
2781     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2782
2783   act.mode = mode;
2784   act.cst = cst;
2785   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2786   if (*cached)
2787     return (*cached)->cost;
2788
2789   *cached = XNEW (struct mbc_entry);
2790   (*cached)->mode = mode;
2791   (*cached)->cst = cst;
2792
2793   start_sequence ();
2794   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2795                gen_int_mode (cst, mode), NULL_RTX, 0);
2796   seq = get_insns ();
2797   end_sequence ();
2798
2799   cost = seq_cost (seq);
2800
2801   if (dump_file && (dump_flags & TDF_DETAILS))
2802     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2803              (int) cst, GET_MODE_NAME (mode), cost);
2804
2805   (*cached)->cost = cost;
2806
2807   return cost;
2808 }
2809
2810 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2811    validity for a memory reference accessing memory of mode MODE.  */
2812
2813 bool
2814 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2815 {
2816 #define MAX_RATIO 128
2817   static sbitmap valid_mult[MAX_MACHINE_MODE];
2818
2819   if (!valid_mult[mode])
2820     {
2821       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2822       rtx addr;
2823       HOST_WIDE_INT i;
2824
2825       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2826       sbitmap_zero (valid_mult[mode]);
2827       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2828       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2829         {
2830           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2831           if (memory_address_p (mode, addr))
2832             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2833         }
2834
2835       if (dump_file && (dump_flags & TDF_DETAILS))
2836         {
2837           fprintf (dump_file, "  allowed multipliers:");
2838           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2839             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2840               fprintf (dump_file, " %d", (int) i);
2841           fprintf (dump_file, "\n");
2842           fprintf (dump_file, "\n");
2843         }
2844     }
2845
2846   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2847     return false;
2848
2849   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2850 }
2851
2852 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2853    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2854    variable is omitted.  Compute the cost for a memory reference that accesses
2855    a memory location of mode MEM_MODE.
2856
2857    TODO -- there must be some better way.  This all is quite crude.  */
2858
2859 static unsigned
2860 get_address_cost (bool symbol_present, bool var_present,
2861                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2862                   enum machine_mode mem_mode)
2863 {
2864   static bool initialized[MAX_MACHINE_MODE];
2865   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2866   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2867   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2868   unsigned cost, acost;
2869   bool offset_p, ratio_p;
2870   HOST_WIDE_INT s_offset;
2871   unsigned HOST_WIDE_INT mask;
2872   unsigned bits;
2873
2874   if (!initialized[mem_mode])
2875     {
2876       HOST_WIDE_INT i;
2877       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2878       int old_cse_not_expected;
2879       unsigned sym_p, var_p, off_p, rat_p, add_c;
2880       rtx seq, addr, base;
2881       rtx reg0, reg1;
2882
2883       initialized[mem_mode] = true;
2884
2885       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2886
2887       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2888       for (i = start; i <= 1 << 20; i <<= 1)
2889         {
2890           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2891           if (!memory_address_p (mem_mode, addr))
2892             break;
2893         }
2894       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2895       off[mem_mode] = max_offset[mem_mode];
2896
2897       for (i = start; i <= 1 << 20; i <<= 1)
2898         {
2899           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2900           if (!memory_address_p (mem_mode, addr))
2901             break;
2902         }
2903       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2904
2905       if (dump_file && (dump_flags & TDF_DETAILS))
2906         {
2907           fprintf (dump_file, "get_address_cost:\n");
2908           fprintf (dump_file, "  min offset %s %d\n",
2909                    GET_MODE_NAME (mem_mode),
2910                    (int) min_offset[mem_mode]);
2911           fprintf (dump_file, "  max offset %s %d\n",
2912                    GET_MODE_NAME (mem_mode),
2913                    (int) max_offset[mem_mode]);
2914         }
2915
2916       rat[mem_mode] = 1;
2917       for (i = 2; i <= MAX_RATIO; i++)
2918         if (multiplier_allowed_in_address_p (i, mem_mode))
2919           {
2920             rat[mem_mode] = i;
2921             break;
2922           }
2923
2924       /* Compute the cost of various addressing modes.  */
2925       acost = 0;
2926       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2927       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2928
2929       for (i = 0; i < 16; i++)
2930         {
2931           sym_p = i & 1;
2932           var_p = (i >> 1) & 1;
2933           off_p = (i >> 2) & 1;
2934           rat_p = (i >> 3) & 1;
2935
2936           addr = reg0;
2937           if (rat_p)
2938             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2939                                    gen_int_mode (rat[mem_mode], Pmode));
2940
2941           if (var_p)
2942             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2943
2944           if (sym_p)
2945             {
2946               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2947               /* ??? We can run into trouble with some backends by presenting
2948                  it with symbols which havn't been properly passed through
2949                  targetm.encode_section_info.  By setting the local bit, we
2950                  enhance the probability of things working.  */
2951               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
2952
2953               if (off_p)
2954                 base = gen_rtx_fmt_e (CONST, Pmode,
2955                                       gen_rtx_fmt_ee (PLUS, Pmode,
2956                                                       base,
2957                                                       gen_int_mode (off[mem_mode],
2958                                                                     Pmode)));
2959             }
2960           else if (off_p)
2961             base = gen_int_mode (off[mem_mode], Pmode);
2962           else
2963             base = NULL_RTX;
2964
2965           if (base)
2966             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2967
2968           start_sequence ();
2969           /* To avoid splitting addressing modes, pretend that no cse will
2970              follow.  */
2971           old_cse_not_expected = cse_not_expected;
2972           cse_not_expected = true;
2973           addr = memory_address (mem_mode, addr);
2974           cse_not_expected = old_cse_not_expected;
2975           seq = get_insns ();
2976           end_sequence ();
2977
2978           acost = seq_cost (seq);
2979           acost += address_cost (addr, mem_mode);
2980
2981           if (!acost)
2982             acost = 1;
2983           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
2984         }
2985
2986       /* On some targets, it is quite expensive to load symbol to a register,
2987          which makes addresses that contain symbols look much more expensive.
2988          However, the symbol will have to be loaded in any case before the
2989          loop (and quite likely we have it in register already), so it does not
2990          make much sense to penalize them too heavily.  So make some final
2991          tweaks for the SYMBOL_PRESENT modes:
2992
2993          If VAR_PRESENT is false, and the mode obtained by changing symbol to
2994          var is cheaper, use this mode with small penalty.
2995          If VAR_PRESENT is true, try whether the mode with
2996          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
2997          if this is the case, use it.  */
2998       add_c = add_cost (Pmode);
2999       for (i = 0; i < 8; i++)
3000         {
3001           var_p = i & 1;
3002           off_p = (i >> 1) & 1;
3003           rat_p = (i >> 2) & 1;
3004
3005           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3006           if (var_p)
3007             acost += add_c;
3008
3009           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3010             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3011         }
3012
3013       if (dump_file && (dump_flags & TDF_DETAILS))
3014         {
3015           fprintf (dump_file, "Address costs:\n");
3016
3017           for (i = 0; i < 16; i++)
3018             {
3019               sym_p = i & 1;
3020               var_p = (i >> 1) & 1;
3021               off_p = (i >> 2) & 1;
3022               rat_p = (i >> 3) & 1;
3023
3024               fprintf (dump_file, "  ");
3025               if (sym_p)
3026                 fprintf (dump_file, "sym + ");
3027               if (var_p)
3028                 fprintf (dump_file, "var + ");
3029               if (off_p)
3030                 fprintf (dump_file, "cst + ");
3031               if (rat_p)
3032                 fprintf (dump_file, "rat * ");
3033
3034               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3035               fprintf (dump_file, "index costs %d\n", acost);
3036             }
3037           fprintf (dump_file, "\n");
3038         }
3039     }
3040
3041   bits = GET_MODE_BITSIZE (Pmode);
3042   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3043   offset &= mask;
3044   if ((offset >> (bits - 1) & 1))
3045     offset |= ~mask;
3046   s_offset = offset;
3047
3048   cost = 0;
3049   offset_p = (s_offset != 0
3050               && min_offset[mem_mode] <= s_offset
3051               && s_offset <= max_offset[mem_mode]);
3052   ratio_p = (ratio != 1
3053              && multiplier_allowed_in_address_p (ratio, mem_mode));
3054
3055   if (ratio != 1 && !ratio_p)
3056     cost += multiply_by_cost (ratio, Pmode);
3057
3058   if (s_offset && !offset_p && !symbol_present)
3059     cost += add_cost (Pmode);
3060
3061   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3062   return cost + acost;
3063 }
3064
3065 /* Estimates cost of forcing expression EXPR into a variable.  */
3066
3067 unsigned
3068 force_expr_to_var_cost (tree expr)
3069 {
3070   static bool costs_initialized = false;
3071   static unsigned integer_cost;
3072   static unsigned symbol_cost;
3073   static unsigned address_cost;
3074   tree op0, op1;
3075   unsigned cost0, cost1, cost;
3076   enum machine_mode mode;
3077
3078   if (!costs_initialized)
3079     {
3080       tree type = build_pointer_type (integer_type_node);
3081       tree var, addr;
3082       rtx x;
3083
3084       var = create_tmp_var_raw (integer_type_node, "test_var");
3085       TREE_STATIC (var) = 1;
3086       x = produce_memory_decl_rtl (var, NULL);
3087       SET_DECL_RTL (var, x);
3088
3089       integer_cost = computation_cost (build_int_cst (integer_type_node,
3090                                                       2000));
3091
3092       addr = build1 (ADDR_EXPR, type, var);
3093       symbol_cost = computation_cost (addr) + 1;
3094
3095       address_cost
3096         = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3097                                     addr,
3098                                     build_int_cst (sizetype, 2000))) + 1;
3099       if (dump_file && (dump_flags & TDF_DETAILS))
3100         {
3101           fprintf (dump_file, "force_expr_to_var_cost:\n");
3102           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3103           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3104           fprintf (dump_file, "  address %d\n", (int) address_cost);
3105           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3106           fprintf (dump_file, "\n");
3107         }
3108
3109       costs_initialized = true;
3110     }
3111
3112   STRIP_NOPS (expr);
3113
3114   if (SSA_VAR_P (expr))
3115     return 0;
3116
3117   if (TREE_INVARIANT (expr))
3118     {
3119       if (TREE_CODE (expr) == INTEGER_CST)
3120         return integer_cost;
3121
3122       if (TREE_CODE (expr) == ADDR_EXPR)
3123         {
3124           tree obj = TREE_OPERAND (expr, 0);
3125
3126           if (TREE_CODE (obj) == VAR_DECL
3127               || TREE_CODE (obj) == PARM_DECL
3128               || TREE_CODE (obj) == RESULT_DECL)
3129             return symbol_cost;
3130         }
3131
3132       return address_cost;
3133     }
3134
3135   switch (TREE_CODE (expr))
3136     {
3137     case POINTER_PLUS_EXPR:
3138     case PLUS_EXPR:
3139     case MINUS_EXPR:
3140     case MULT_EXPR:
3141       op0 = TREE_OPERAND (expr, 0);
3142       op1 = TREE_OPERAND (expr, 1);
3143       STRIP_NOPS (op0);
3144       STRIP_NOPS (op1);
3145
3146       if (is_gimple_val (op0))
3147         cost0 = 0;
3148       else
3149         cost0 = force_expr_to_var_cost (op0);
3150
3151       if (is_gimple_val (op1))
3152         cost1 = 0;
3153       else
3154         cost1 = force_expr_to_var_cost (op1);
3155
3156       break;
3157
3158     default:
3159       /* Just an arbitrary value, FIXME.  */
3160       return target_spill_cost;
3161     }
3162
3163   mode = TYPE_MODE (TREE_TYPE (expr));
3164   switch (TREE_CODE (expr))
3165     {
3166     case POINTER_PLUS_EXPR:
3167     case PLUS_EXPR:
3168     case MINUS_EXPR:
3169       cost = add_cost (mode);
3170       break;
3171
3172     case MULT_EXPR:
3173       if (cst_and_fits_in_hwi (op0))
3174         cost = multiply_by_cost (int_cst_value (op0), mode);
3175       else if (cst_and_fits_in_hwi (op1))
3176         cost = multiply_by_cost (int_cst_value (op1), mode);
3177       else
3178         return target_spill_cost;
3179       break;
3180
3181     default:
3182       gcc_unreachable ();
3183     }
3184
3185   cost += cost0;
3186   cost += cost1;
3187
3188   /* Bound the cost by target_spill_cost.  The parts of complicated
3189      computations often are either loop invariant or at least can
3190      be shared between several iv uses, so letting this grow without
3191      limits would not give reasonable results.  */
3192   return cost < target_spill_cost ? cost : target_spill_cost;
3193 }
3194
3195 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3196    invariants the computation depends on.  */
3197
3198 static unsigned
3199 force_var_cost (struct ivopts_data *data,
3200                 tree expr, bitmap *depends_on)
3201 {
3202   if (depends_on)
3203     {
3204       fd_ivopts_data = data;
3205       walk_tree (&expr, find_depends, depends_on, NULL);
3206     }
3207
3208   return force_expr_to_var_cost (expr);
3209 }
3210
3211 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3212    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3213    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3214    invariants the computation depends on.  */
3215
3216 static unsigned
3217 split_address_cost (struct ivopts_data *data,
3218                     tree addr, bool *symbol_present, bool *var_present,
3219                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3220 {
3221   tree core;
3222   HOST_WIDE_INT bitsize;
3223   HOST_WIDE_INT bitpos;
3224   tree toffset;
3225   enum machine_mode mode;
3226   int unsignedp, volatilep;
3227
3228   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3229                               &unsignedp, &volatilep, false);
3230
3231   if (toffset != 0
3232       || bitpos % BITS_PER_UNIT != 0
3233       || TREE_CODE (core) != VAR_DECL)
3234     {
3235       *symbol_present = false;
3236       *var_present = true;
3237       fd_ivopts_data = data;
3238       walk_tree (&addr, find_depends, depends_on, NULL);
3239       return target_spill_cost;
3240     }
3241
3242   *offset += bitpos / BITS_PER_UNIT;
3243   if (TREE_STATIC (core)
3244       || DECL_EXTERNAL (core))
3245     {
3246       *symbol_present = true;
3247       *var_present = false;
3248       return 0;
3249     }
3250
3251   *symbol_present = false;
3252   *var_present = true;
3253   return 0;
3254 }
3255
3256 /* Estimates cost of expressing difference of addresses E1 - E2 as
3257    var + symbol + offset.  The value of offset is added to OFFSET,
3258    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3259    part is missing.  DEPENDS_ON is a set of the invariants the computation
3260    depends on.  */
3261
3262 static unsigned
3263 ptr_difference_cost (struct ivopts_data *data,
3264                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3265                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3266 {
3267   HOST_WIDE_INT diff = 0;
3268   unsigned cost;
3269
3270   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3271
3272   if (ptr_difference_const (e1, e2, &diff))
3273     {
3274       *offset += diff;
3275       *symbol_present = false;
3276       *var_present = false;
3277       return 0;
3278     }
3279
3280   if (e2 == integer_zero_node)
3281     return split_address_cost (data, TREE_OPERAND (e1, 0),
3282                                symbol_present, var_present, offset, depends_on);
3283
3284   *symbol_present = false;
3285   *var_present = true;
3286
3287   cost = force_var_cost (data, e1, depends_on);
3288   cost += force_var_cost (data, e2, depends_on);
3289   cost += add_cost (Pmode);
3290
3291   return cost;
3292 }
3293
3294 /* Estimates cost of expressing difference E1 - E2 as
3295    var + symbol + offset.  The value of offset is added to OFFSET,
3296    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3297    part is missing.  DEPENDS_ON is a set of the invariants the computation
3298    depends on.  */
3299
3300 static unsigned
3301 difference_cost (struct ivopts_data *data,
3302                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3303                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3304 {
3305   unsigned cost;
3306   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3307   unsigned HOST_WIDE_INT off1, off2;
3308
3309   e1 = strip_offset (e1, &off1);
3310   e2 = strip_offset (e2, &off2);
3311   *offset += off1 - off2;
3312
3313   STRIP_NOPS (e1);
3314   STRIP_NOPS (e2);
3315
3316   if (TREE_CODE (e1) == ADDR_EXPR)
3317     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3318                                 depends_on);
3319   *symbol_present = false;
3320
3321   if (operand_equal_p (e1, e2, 0))
3322     {
3323       *var_present = false;
3324       return 0;
3325     }
3326   *var_present = true;
3327   if (integer_zerop (e2))
3328     return force_var_cost (data, e1, depends_on);
3329
3330   if (integer_zerop (e1))
3331     {
3332       cost = force_var_cost (data, e2, depends_on);
3333       cost += multiply_by_cost (-1, mode);
3334
3335       return cost;
3336     }
3337
3338   cost = force_var_cost (data, e1, depends_on);
3339   cost += force_var_cost (data, e2, depends_on);
3340   cost += add_cost (mode);
3341
3342   return cost;
3343 }
3344
3345 /* Determines the cost of the computation by that USE is expressed
3346    from induction variable CAND.  If ADDRESS_P is true, we just need
3347    to create an address from it, otherwise we want to get it into
3348    register.  A set of invariants we depend on is stored in
3349    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3350
3351 static unsigned
3352 get_computation_cost_at (struct ivopts_data *data,
3353                          struct iv_use *use, struct iv_cand *cand,
3354                          bool address_p, bitmap *depends_on, tree at)
3355 {
3356   tree ubase = use->iv->base, ustep = use->iv->step;
3357   tree cbase, cstep;
3358   tree utype = TREE_TYPE (ubase), ctype;
3359   unsigned HOST_WIDE_INT cstepi, offset = 0;
3360   HOST_WIDE_INT ratio, aratio;
3361   bool var_present, symbol_present;
3362   unsigned cost = 0, n_sums;
3363   double_int rat;
3364
3365   *depends_on = NULL;
3366
3367   /* Only consider real candidates.  */
3368   if (!cand->iv)
3369     return INFTY;
3370
3371   cbase = cand->iv->base;
3372   cstep = cand->iv->step;
3373   ctype = TREE_TYPE (cbase);
3374
3375   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3376     {
3377       /* We do not have a precision to express the values of use.  */
3378       return INFTY;
3379     }
3380
3381   if (address_p)
3382     {
3383       /* Do not try to express address of an object with computation based
3384          on address of a different object.  This may cause problems in rtl
3385          level alias analysis (that does not expect this to be happening,
3386          as this is illegal in C), and would be unlikely to be useful
3387          anyway.  */
3388       if (use->iv->base_object
3389           && cand->iv->base_object
3390           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3391         return INFTY;
3392     }
3393
3394   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3395     {
3396       /* TODO -- add direct handling of this case.  */
3397       goto fallback;
3398     }
3399
3400   /* CSTEPI is removed from the offset in case statement is after the
3401      increment.  If the step is not constant, we use zero instead.
3402      This is a bit imprecise (there is the extra addition), but
3403      redundancy elimination is likely to transform the code so that
3404      it uses value of the variable before increment anyway,
3405      so it is not that much unrealistic.  */
3406   if (cst_and_fits_in_hwi (cstep))
3407     cstepi = int_cst_value (cstep);
3408   else
3409     cstepi = 0;
3410
3411   if (!constant_multiple_of (ustep, cstep, &rat))
3412     return INFTY;
3413
3414   if (double_int_fits_in_shwi_p (rat))
3415     ratio = double_int_to_shwi (rat);
3416   else
3417     return INFTY;
3418
3419   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3420      or ratio == 1, it is better to handle this like
3421
3422      ubase - ratio * cbase + ratio * var
3423
3424      (also holds in the case ratio == -1, TODO.  */
3425
3426   if (cst_and_fits_in_hwi (cbase))
3427     {
3428       offset = - ratio * int_cst_value (cbase);
3429       cost += difference_cost (data,
3430                                ubase, integer_zero_node,
3431                                &symbol_present, &var_present, &offset,
3432                                depends_on);
3433     }
3434   else if (ratio == 1)
3435     {
3436       cost += difference_cost (data,
3437                                ubase, cbase,
3438                                &symbol_present, &var_present, &offset,
3439                                depends_on);
3440     }
3441   else
3442     {
3443       cost += force_var_cost (data, cbase, depends_on);
3444       cost += add_cost (TYPE_MODE (ctype));
3445       cost += difference_cost (data,
3446                                ubase, integer_zero_node,
3447                                &symbol_present, &var_present, &offset,
3448                                depends_on);
3449     }
3450
3451   /* If we are after the increment, the value of the candidate is higher by
3452      one iteration.  */
3453   if (stmt_after_increment (data->current_loop, cand, at))
3454     offset -= ratio * cstepi;
3455
3456   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3457      (symbol/var/const parts may be omitted).  If we are looking for an address,
3458      find the cost of addressing this.  */
3459   if (address_p)
3460     return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3461                                     TYPE_MODE (TREE_TYPE (*use->op_p)));
3462
3463   /* Otherwise estimate the costs for computing the expression.  */
3464   aratio = ratio > 0 ? ratio : -ratio;
3465   if (!symbol_present && !var_present && !offset)
3466     {
3467       if (ratio != 1)
3468         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3469
3470       return cost;
3471     }
3472
3473   if (aratio != 1)
3474     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3475
3476   n_sums = 1;
3477   if (var_present
3478       /* Symbol + offset should be compile-time computable.  */
3479       && (symbol_present || offset))
3480     n_sums++;
3481
3482   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3483
3484 fallback:
3485   {
3486     /* Just get the expression, expand it and measure the cost.  */
3487     tree comp = get_computation_at (data->current_loop, use, cand, at);
3488
3489     if (!comp)
3490       return INFTY;
3491
3492     if (address_p)
3493       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3494
3495     return computation_cost (comp);
3496   }
3497 }
3498
3499 /* Determines the cost of the computation by that USE is expressed
3500    from induction variable CAND.  If ADDRESS_P is true, we just need
3501    to create an address from it, otherwise we want to get it into
3502    register.  A set of invariants we depend on is stored in
3503    DEPENDS_ON.  */
3504
3505 static unsigned
3506 get_computation_cost (struct ivopts_data *data,
3507                       struct iv_use *use, struct iv_cand *cand,
3508                       bool address_p, bitmap *depends_on)
3509 {
3510   return get_computation_cost_at (data,
3511                                   use, cand, address_p, depends_on, use->stmt);
3512 }
3513
3514 /* Determines cost of basing replacement of USE on CAND in a generic
3515    expression.  */
3516
3517 static bool
3518 determine_use_iv_cost_generic (struct ivopts_data *data,
3519                                struct iv_use *use, struct iv_cand *cand)
3520 {
3521   bitmap depends_on;
3522   unsigned cost;
3523
3524   /* The simple case first -- if we need to express value of the preserved
3525      original biv, the cost is 0.  This also prevents us from counting the
3526      cost of increment twice -- once at this use and once in the cost of
3527      the candidate.  */
3528   if (cand->pos == IP_ORIGINAL
3529       && cand->incremented_at == use->stmt)
3530     {
3531       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3532       return true;
3533     }
3534
3535   cost = get_computation_cost (data, use, cand, false, &depends_on);
3536   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3537
3538   return cost != INFTY;
3539 }
3540
3541 /* Determines cost of basing replacement of USE on CAND in an address.  */
3542
3543 static bool
3544 determine_use_iv_cost_address (struct ivopts_data *data,
3545                                struct iv_use *use, struct iv_cand *cand)
3546 {
3547   bitmap depends_on;
3548   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3549
3550   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3551
3552   return cost != INFTY;
3553 }
3554
3555 /* Computes value of candidate CAND at position AT in iteration NITER, and
3556    stores it to VAL.  */
3557
3558 static void
3559 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3560                aff_tree *val)
3561 {
3562   aff_tree step, delta, nit;
3563   struct iv *iv = cand->iv;
3564   tree type = TREE_TYPE (iv->base);
3565
3566   tree_to_aff_combination (iv->step, type, &step);
3567   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3568   aff_combination_convert (&nit, type);
3569   aff_combination_mult (&nit, &step, &delta);
3570   if (stmt_after_increment (loop, cand, at))
3571     aff_combination_add (&delta, &step);
3572
3573   tree_to_aff_combination (iv->base, type, val);
3574   aff_combination_add (val, &delta);
3575 }
3576
3577 /* Returns period of induction variable iv.  */
3578
3579 static tree
3580 iv_period (struct iv *iv)
3581 {
3582   tree step = iv->step, period, type;
3583   tree pow2div;
3584
3585   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3586
3587   /* Period of the iv is gcd (step, type range).  Since type range is power
3588      of two, it suffices to determine the maximum power of two that divides
3589      step.  */
3590   pow2div = num_ending_zeros (step);
3591   type = unsigned_type_for (TREE_TYPE (step));
3592
3593   period = build_low_bits_mask (type,
3594                                 (TYPE_PRECISION (type)
3595                                  - tree_low_cst (pow2div, 1)));
3596
3597   return period;
3598 }
3599
3600 /* Returns the comparison operator used when eliminating the iv USE.  */
3601
3602 static enum tree_code
3603 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3604 {
3605   struct loop *loop = data->current_loop;
3606   basic_block ex_bb;
3607   edge exit;
3608
3609   ex_bb = bb_for_stmt (use->stmt);
3610   exit = EDGE_SUCC (ex_bb, 0);
3611   if (flow_bb_inside_loop_p (loop, exit->dest))
3612     exit = EDGE_SUCC (ex_bb, 1);
3613
3614   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3615 }
3616
3617 /* Check whether it is possible to express the condition in USE by comparison
3618    of candidate CAND.  If so, store the value compared with to BOUND.  */
3619
3620 static bool
3621 may_eliminate_iv (struct ivopts_data *data,
3622                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3623 {
3624   basic_block ex_bb;
3625   edge exit;
3626   tree nit, nit_type;
3627   tree wider_type, period, per_type;
3628   struct loop *loop = data->current_loop;
3629   aff_tree bnd;
3630
3631   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3632     return false;
3633
3634   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3635      for other conditions inside loop body.  */
3636   ex_bb = bb_for_stmt (use->stmt);
3637   if (use->stmt != last_stmt (ex_bb)
3638       || TREE_CODE (use->stmt) != COND_EXPR)
3639     return false;
3640   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3641     return false;
3642
3643   exit = EDGE_SUCC (ex_bb, 0);
3644   if (flow_bb_inside_loop_p (loop, exit->dest))
3645     exit = EDGE_SUCC (ex_bb, 1);
3646   if (flow_bb_inside_loop_p (loop, exit->dest))
3647     return false;
3648
3649   nit = niter_for_exit (data, exit);
3650   if (!nit)
3651     return false;
3652
3653   nit_type = TREE_TYPE (nit);
3654
3655   /* Determine whether we may use the variable to test whether niter iterations
3656      elapsed.  This is the case iff the period of the induction variable is
3657      greater than the number of iterations.  */
3658   period = iv_period (cand->iv);
3659   if (!period)
3660     return false;
3661   per_type = TREE_TYPE (period);
3662
3663   wider_type = TREE_TYPE (period);
3664   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3665     wider_type = per_type;
3666   else
3667     wider_type = nit_type;
3668
3669   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3670                                       fold_convert (wider_type, period),
3671                                       fold_convert (wider_type, nit))))
3672     return false;
3673
3674   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3675   *bound = aff_combination_to_tree (&bnd);
3676   return true;
3677 }
3678
3679 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3680
3681 static bool
3682 determine_use_iv_cost_condition (struct ivopts_data *data,
3683                                  struct iv_use *use, struct iv_cand *cand)
3684 {
3685   tree bound = NULL_TREE;
3686   struct iv *cmp_iv;
3687   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3688   unsigned elim_cost, express_cost, cost;
3689   bool ok;
3690
3691   /* Only consider real candidates.  */
3692   if (!cand->iv)
3693     {
3694       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3695       return false;
3696     }
3697
3698   /* Try iv elimination.  */
3699   if (may_eliminate_iv (data, use, cand, &bound))
3700     elim_cost = force_var_cost (data, bound, &depends_on_elim);
3701   else
3702     elim_cost = INFTY;
3703
3704   /* Try expressing the original giv.  If it is compared with an invariant,
3705      note that we cannot get rid of it.  */
3706   ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3707   gcc_assert (ok);
3708
3709   express_cost = get_computation_cost (data, use, cand, false,
3710                                        &depends_on_express);
3711   fd_ivopts_data = data;
3712   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3713
3714   /* Choose the better approach.  */
3715   if (elim_cost < express_cost)
3716     {
3717       cost = elim_cost;
3718       depends_on = depends_on_elim;
3719       depends_on_elim = NULL;
3720     }
3721   else
3722     {
3723       cost = express_cost;
3724       depends_on = depends_on_express;
3725       depends_on_express = NULL;
3726       bound = NULL_TREE;
3727     }
3728
3729   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3730
3731   if (depends_on_elim)
3732     BITMAP_FREE (depends_on_elim);
3733   if (depends_on_express)
3734     BITMAP_FREE (depends_on_express);
3735
3736   return cost != INFTY;
3737 }
3738
3739 /* Determines cost of basing replacement of USE on CAND.  Returns false
3740    if USE cannot be based on CAND.  */
3741
3742 static bool
3743 determine_use_iv_cost (struct ivopts_data *data,
3744                        struct iv_use *use, struct iv_cand *cand)
3745 {
3746   switch (use->type)
3747     {
3748     case USE_NONLINEAR_EXPR:
3749       return determine_use_iv_cost_generic (data, use, cand);
3750
3751     case USE_ADDRESS:
3752       return determine_use_iv_cost_address (data, use, cand);
3753
3754     case USE_COMPARE:
3755       return determine_use_iv_cost_condition (data, use, cand);
3756
3757     default:
3758       gcc_unreachable ();
3759     }
3760 }
3761
3762 /* Determines costs of basing the use of the iv on an iv candidate.  */
3763
3764 static void
3765 determine_use_iv_costs (struct ivopts_data *data)
3766 {
3767   unsigned i, j;
3768   struct iv_use *use;
3769   struct iv_cand *cand;
3770   bitmap to_clear = BITMAP_ALLOC (NULL);
3771
3772   alloc_use_cost_map (data);
3773
3774   for (i = 0; i < n_iv_uses (data); i++)
3775     {
3776       use = iv_use (data, i);
3777
3778       if (data->consider_all_candidates)
3779         {
3780           for (j = 0; j < n_iv_cands (data); j++)
3781             {
3782               cand = iv_cand (data, j);
3783               determine_use_iv_cost (data, use, cand);
3784             }
3785         }
3786       else
3787         {
3788           bitmap_iterator bi;
3789
3790           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3791             {
3792               cand = iv_cand (data, j);
3793               if (!determine_use_iv_cost (data, use, cand))
3794                 bitmap_set_bit (to_clear, j);
3795             }
3796
3797           /* Remove the candidates for that the cost is infinite from
3798              the list of related candidates.  */
3799           bitmap_and_compl_into (use->related_cands, to_clear);
3800           bitmap_clear (to_clear);
3801         }
3802     }
3803
3804   BITMAP_FREE (to_clear);
3805
3806   if (dump_file && (dump_flags & TDF_DETAILS))
3807     {
3808       fprintf (dump_file, "Use-candidate costs:\n");
3809
3810       for (i = 0; i < n_iv_uses (data); i++)
3811         {
3812           use = iv_use (data, i);
3813
3814           fprintf (dump_file, "Use %d:\n", i);
3815           fprintf (dump_file, "  cand\tcost\tdepends on\n");
3816           for (j = 0; j < use->n_map_members; j++)
3817             {
3818               if (!use->cost_map[j].cand
3819                   || use->cost_map[j].cost == INFTY)
3820                 continue;
3821
3822               fprintf (dump_file, "  %d\t%d\t",
3823                        use->cost_map[j].cand->id,
3824                        use->cost_map[j].cost);
3825               if (use->cost_map[j].depends_on)
3826                 bitmap_print (dump_file,
3827                               use->cost_map[j].depends_on, "","");
3828               fprintf (dump_file, "\n");
3829             }
3830
3831           fprintf (dump_file, "\n");
3832         }
3833       fprintf (dump_file, "\n");
3834     }
3835 }
3836
3837 /* Determines cost of the candidate CAND.  */
3838
3839 static void
3840 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3841 {
3842   unsigned cost_base, cost_step;
3843   tree base;
3844
3845   if (!cand->iv)
3846     {
3847       cand->cost = 0;
3848       return;
3849     }
3850
3851   /* There are two costs associated with the candidate -- its increment
3852      and its initialization.  The second is almost negligible for any loop
3853      that rolls enough, so we take it just very little into account.  */
3854
3855   base = cand->iv->base;
3856   cost_base = force_var_cost (data, base, NULL);
3857   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3858
3859   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3860
3861   /* Prefer the original iv unless we may gain something by replacing it;
3862      this is not really relevant for artificial ivs created by other
3863      passes.  */
3864   if (cand->pos == IP_ORIGINAL
3865       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3866     cand->cost--;
3867
3868   /* Prefer not to insert statements into latch unless there are some
3869      already (so that we do not create unnecessary jumps).  */
3870   if (cand->pos == IP_END
3871       && empty_block_p (ip_end_pos (data->current_loop)))
3872     cand->cost++;
3873 }
3874
3875 /* Determines costs of computation of the candidates.  */
3876
3877 static void
3878 determine_iv_costs (struct ivopts_data *data)
3879 {
3880   unsigned i;
3881
3882   if (dump_file && (dump_flags & TDF_DETAILS))
3883     {
3884       fprintf (dump_file, "Candidate costs:\n");
3885       fprintf (dump_file, "  cand\tcost\n");
3886     }
3887
3888   for (i = 0; i < n_iv_cands (data); i++)
3889     {
3890       struct iv_cand *cand = iv_cand (data, i);
3891
3892       determine_iv_cost (data, cand);
3893
3894       if (dump_file && (dump_flags & TDF_DETAILS))
3895         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
3896     }
3897
3898   if (dump_file && (dump_flags & TDF_DETAILS))
3899     fprintf (dump_file, "\n");
3900 }
3901
3902 /* Calculates cost for having SIZE induction variables.  */
3903
3904 static unsigned
3905 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3906 {
3907   /* We add size to the cost, so that we prefer eliminating ivs
3908      if possible.  */
3909   return size + estimate_reg_pressure_cost (size, data->regs_used);
3910 }
3911
3912 /* For each size of the induction variable set determine the penalty.  */
3913
3914 static void
3915 determine_set_costs (struct ivopts_data *data)
3916 {
3917   unsigned j, n;
3918   tree phi, op;
3919   struct loop *loop = data->current_loop;
3920   bitmap_iterator bi;
3921
3922   /* We use the following model (definitely improvable, especially the
3923      cost function -- TODO):
3924
3925      We estimate the number of registers available (using MD data), name it A.
3926
3927      We estimate the number of registers used by the loop, name it U.  This
3928      number is obtained as the number of loop phi nodes (not counting virtual
3929      registers and bivs) + the number of variables from outside of the loop.
3930
3931      We set a reserve R (free regs that are used for temporary computations,
3932      etc.).  For now the reserve is a constant 3.
3933
3934      Let I be the number of induction variables.
3935
3936      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3937         make a lot of ivs without a reason).
3938      -- if A - R < U + I <= A, the cost is I * PRES_COST
3939      -- if U + I > A, the cost is I * PRES_COST and
3940         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
3941
3942   if (dump_file && (dump_flags & TDF_DETAILS))
3943     {
3944       fprintf (dump_file, "Global costs:\n");
3945       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
3946       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost);
3947       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
3948     }
3949
3950   n = 0;
3951   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3952     {
3953       op = PHI_RESULT (phi);
3954
3955       if (!is_gimple_reg (op))
3956         continue;
3957
3958       if (get_iv (data, op))
3959         continue;
3960
3961       n++;
3962     }
3963
3964   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3965     {
3966       struct version_info *info = ver_info (data, j);
3967
3968       if (info->inv_id && info->has_nonlin_use)
3969         n++;
3970     }
3971
3972   data->regs_used = n;
3973   if (dump_file && (dump_flags & TDF_DETAILS))
3974     fprintf (dump_file, "  regs_used %d\n", n);
3975
3976   if (dump_file && (dump_flags & TDF_DETAILS))
3977     {
3978       fprintf (dump_file, "  cost for size:\n");
3979       fprintf (dump_file, "  ivs\tcost\n");
3980       for (j = 0; j <= 2 * target_avail_regs; j++)
3981         fprintf (dump_file, "  %d\t%d\n", j,
3982                  ivopts_global_cost_for_size (data, j));
3983       fprintf (dump_file, "\n");
3984     }
3985 }
3986
3987 /* Returns true if A is a cheaper cost pair than B.  */
3988
3989 static bool
3990 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3991 {
3992   if (!a)
3993     return false;
3994
3995   if (!b)
3996     return true;
3997
3998   if (a->cost < b->cost)
3999     return true;
4000
4001   if (a->cost > b->cost)
4002     return false;
4003
4004   /* In case the costs are the same, prefer the cheaper candidate.  */
4005   if (a->cand->cost < b->cand->cost)
4006     return true;
4007
4008   return false;
4009 }
4010
4011 /* Computes the cost field of IVS structure.  */
4012
4013 static void
4014 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4015 {
4016   unsigned cost = 0;
4017
4018   cost += ivs->cand_use_cost;
4019   cost += ivs->cand_cost;
4020   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4021
4022   ivs->cost = cost;
4023 }
4024
4025 /* Remove invariants in set INVS to set IVS.  */
4026
4027 static void
4028 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4029 {
4030   bitmap_iterator bi;
4031   unsigned iid;
4032
4033   if (!invs)
4034     return;
4035
4036   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4037     {
4038       ivs->n_invariant_uses[iid]--;
4039       if (ivs->n_invariant_uses[iid] == 0)
4040         ivs->n_regs--;
4041     }
4042 }
4043
4044 /* Set USE not to be expressed by any candidate in IVS.  */
4045
4046 static void
4047 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4048                  struct iv_use *use)
4049 {
4050   unsigned uid = use->id, cid;
4051   struct cost_pair *cp;
4052
4053   cp = ivs->cand_for_use[uid];
4054   if (!cp)
4055     return;
4056   cid = cp->cand->id;
4057
4058   ivs->bad_uses++;
4059   ivs->cand_for_use[uid] = NULL;
4060   ivs->n_cand_uses[cid]--;
4061
4062   if (ivs->n_cand_uses[cid] == 0)
4063     {
4064       bitmap_clear_bit (ivs->cands, cid);
4065       /* Do not count the pseudocandidates.  */
4066       if (cp->cand->iv)
4067         ivs->n_regs--;
4068       ivs->n_cands--;
4069       ivs->cand_cost -= cp->cand->cost;
4070
4071       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4072     }
4073
4074   ivs->cand_use_cost -= cp->cost;
4075
4076   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4077   iv_ca_recount_cost (data, ivs);
4078 }
4079
4080 /* Add invariants in set INVS to set IVS.  */
4081
4082 static void
4083 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4084 {
4085   bitmap_iterator bi;
4086   unsigned iid;
4087
4088   if (!invs)
4089     return;
4090
4091   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4092     {
4093       ivs->n_invariant_uses[iid]++;
4094       if (ivs->n_invariant_uses[iid] == 1)
4095         ivs->n_regs++;
4096     }
4097 }
4098
4099 /* Set cost pair for USE in set IVS to CP.  */
4100
4101 static void
4102 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4103               struct iv_use *use, struct cost_pair *cp)
4104 {
4105   unsigned uid = use->id, cid;
4106
4107   if (ivs->cand_for_use[uid] == cp)
4108     return;
4109
4110   if (ivs->cand_for_use[uid])
4111     iv_ca_set_no_cp (data, ivs, use);
4112
4113   if (cp)
4114     {
4115       cid = cp->cand->id;
4116
4117       ivs->bad_uses--;
4118       ivs->cand_for_use[uid] = cp;
4119       ivs->n_cand_uses[cid]++;
4120       if (ivs->n_cand_uses[cid] == 1)
4121         {
4122           bitmap_set_bit (ivs->cands, cid);
4123           /* Do not count the pseudocandidates.  */
4124           if (cp->cand->iv)
4125             ivs->n_regs++;
4126           ivs->n_cands++;
4127           ivs->cand_cost += cp->cand->cost;
4128
4129           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4130         }
4131
4132       ivs->cand_use_cost += cp->cost;
4133       iv_ca_set_add_invariants (ivs, cp->depends_on);
4134       iv_ca_recount_cost (data, ivs);
4135     }
4136 }
4137
4138 /* Extend set IVS by expressing USE by some of the candidates in it
4139    if possible.  */
4140
4141 static void
4142 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4143                struct iv_use *use)
4144 {
4145   struct cost_pair *best_cp = NULL, *cp;
4146   bitmap_iterator bi;
4147   unsigned i;
4148
4149   gcc_assert (ivs->upto >= use->id);
4150
4151   if (ivs->upto == use->id)
4152     {
4153       ivs->upto++;
4154       ivs->bad_uses++;
4155     }
4156
4157   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4158     {
4159       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4160
4161       if (cheaper_cost_pair (cp, best_cp))
4162         best_cp = cp;
4163     }
4164
4165   iv_ca_set_cp (data, ivs, use, best_cp);
4166 }
4167
4168 /* Get cost for assignment IVS.  */
4169
4170 static unsigned
4171 iv_ca_cost (struct iv_ca *ivs)
4172 {
4173   return (ivs->bad_uses ? INFTY : ivs->cost);
4174 }
4175
4176 /* Returns true if all dependences of CP are among invariants in IVS.  */
4177
4178 static bool
4179 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4180 {
4181   unsigned i;
4182   bitmap_iterator bi;
4183
4184   if (!cp->depends_on)
4185     return true;
4186
4187   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4188     {
4189       if (ivs->n_invariant_uses[i] == 0)
4190         return false;
4191     }
4192
4193   return true;
4194 }
4195
4196 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4197    it before NEXT_CHANGE.  */
4198
4199 static struct iv_ca_delta *
4200 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4201                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4202 {
4203   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4204
4205   change->use = use;
4206   change->old_cp = old_cp;
4207   change->new_cp = new_cp;
4208   change->next_change = next_change;
4209
4210   return change;
4211 }
4212
4213 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4214    are rewritten.  */
4215
4216 static struct iv_ca_delta *
4217 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4218 {
4219   struct iv_ca_delta *last;
4220
4221   if (!l2)
4222     return l1;
4223
4224   if (!l1)
4225     return l2;
4226
4227   for (last = l1; last->next_change; last = last->next_change)
4228     continue;
4229   last->next_change = l2;
4230
4231   return l1;
4232 }
4233
4234 /* Returns candidate by that USE is expressed in IVS.  */
4235
4236 static struct cost_pair *
4237 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4238 {
4239   return ivs->cand_for_use[use->id];
4240 }
4241
4242 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4243
4244 static struct iv_ca_delta *
4245 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4246 {
4247   struct iv_ca_delta *act, *next, *prev = NULL;
4248   struct cost_pair *tmp;
4249
4250   for (act = delta; act; act = next)
4251     {
4252       next = act->next_change;
4253       act->next_change = prev;
4254       prev = act;
4255
4256       tmp = act->old_cp;
4257       act->old_cp = act->new_cp;
4258       act->new_cp = tmp;
4259     }
4260
4261   return prev;
4262 }
4263
4264 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4265    reverted instead.  */
4266
4267 static void
4268 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4269                     struct iv_ca_delta *delta, bool forward)
4270 {
4271   struct cost_pair *from, *to;
4272   struct iv_ca_delta *act;
4273
4274   if (!forward)
4275     delta = iv_ca_delta_reverse (delta);
4276
4277   for (act = delta; act; act = act->next_change)
4278     {
4279       from = act->old_cp;
4280       to = act->new_cp;
4281       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4282       iv_ca_set_cp (data, ivs, act->use, to);
4283     }
4284
4285   if (!forward)
4286     iv_ca_delta_reverse (delta);
4287 }
4288
4289 /* Returns true if CAND is used in IVS.  */
4290
4291 static bool
4292 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4293 {
4294   return ivs->n_cand_uses[cand->id] > 0;
4295 }
4296
4297 /* Returns number of induction variable candidates in the set IVS.  */
4298
4299 static unsigned
4300 iv_ca_n_cands (struct iv_ca *ivs)
4301 {
4302   return ivs->n_cands;
4303 }
4304
4305 /* Free the list of changes DELTA.  */
4306
4307 static void
4308 iv_ca_delta_free (struct iv_ca_delta **delta)
4309 {
4310   struct iv_ca_delta *act, *next;
4311
4312   for (act = *delta; act; act = next)
4313     {
4314       next = act->next_change;
4315       free (act);
4316     }
4317
4318   *delta = NULL;
4319 }
4320
4321 /* Allocates new iv candidates assignment.  */
4322
4323 static struct iv_ca *
4324 iv_ca_new (struct ivopts_data *data)
4325 {
4326   struct iv_ca *nw = XNEW (struct iv_ca);
4327
4328   nw->upto = 0;
4329   nw->bad_uses = 0;
4330   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4331   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4332   nw->cands = BITMAP_ALLOC (NULL);
4333   nw->n_cands = 0;
4334   nw->n_regs = 0;
4335   nw->cand_use_cost = 0;
4336   nw->cand_cost = 0;
4337   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4338   nw->cost = 0;
4339
4340   return nw;
4341 }
4342
4343 /* Free memory occupied by the set IVS.  */
4344
4345 static void
4346 iv_ca_free (struct iv_ca **ivs)
4347 {
4348   free ((*ivs)->cand_for_use);
4349   free ((*ivs)->n_cand_uses);
4350   BITMAP_FREE ((*ivs)->cands);
4351   free ((*ivs)->n_invariant_uses);
4352   free (*ivs);
4353   *ivs = NULL;
4354 }
4355
4356 /* Dumps IVS to FILE.  */
4357
4358 static void
4359 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4360 {
4361   const char *pref = "  invariants ";
4362   unsigned i;
4363
4364   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4365   bitmap_print (file, ivs->cands, "  candidates ","\n");
4366
4367   for (i = 1; i <= data->max_inv_id; i++)
4368     if (ivs->n_invariant_uses[i])
4369       {
4370         fprintf (file, "%s%d", pref, i);
4371         pref = ", ";
4372       }
4373   fprintf (file, "\n");
4374 }
4375
4376 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4377    new set, and store differences in DELTA.  Number of induction variables
4378    in the new set is stored to N_IVS.  */
4379
4380 static unsigned
4381 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4382               struct iv_cand *cand, struct iv_ca_delta **delta,
4383               unsigned *n_ivs)
4384 {
4385   unsigned i, cost;
4386   struct iv_use *use;
4387   struct cost_pair *old_cp, *new_cp;
4388
4389   *delta = NULL;
4390   for (i = 0; i < ivs->upto; i++)
4391     {
4392       use = iv_use (data, i);
4393       old_cp = iv_ca_cand_for_use (ivs, use);
4394
4395       if (old_cp
4396           && old_cp->cand == cand)
4397         continue;
4398
4399       new_cp = get_use_iv_cost (data, use, cand);
4400       if (!new_cp)
4401         continue;
4402
4403       if (!iv_ca_has_deps (ivs, new_cp))
4404         continue;
4405
4406       if (!cheaper_cost_pair (new_cp, old_cp))
4407         continue;
4408
4409       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4410     }
4411
4412   iv_ca_delta_commit (data, ivs, *delta, true);
4413   cost = iv_ca_cost (ivs);
4414   if (n_ivs)
4415     *n_ivs = iv_ca_n_cands (ivs);
4416   iv_ca_delta_commit (data, ivs, *delta, false);
4417
4418   return cost;
4419 }
4420
4421 /* Try narrowing set IVS by removing CAND.  Return the cost of
4422    the new set and store the differences in DELTA.  */
4423
4424 static unsigned
4425 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4426               struct iv_cand *cand, struct iv_ca_delta **delta)
4427 {
4428   unsigned i, ci;
4429   struct iv_use *use;
4430   struct cost_pair *old_cp, *new_cp, *cp;
4431   bitmap_iterator bi;
4432   struct iv_cand *cnd;
4433   unsigned cost;
4434
4435   *delta = NULL;
4436   for (i = 0; i < n_iv_uses (data); i++)
4437     {
4438       use = iv_use (data, i);
4439
4440       old_cp = iv_ca_cand_for_use (ivs, use);
4441       if (old_cp->cand != cand)
4442         continue;
4443
4444       new_cp = NULL;
4445
4446       if (data->consider_all_candidates)
4447         {
4448           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4449             {
4450               if (ci == cand->id)
4451                 continue;
4452
4453               cnd = iv_cand (data, ci);
4454
4455               cp = get_use_iv_cost (data, use, cnd);
4456               if (!cp)
4457                 continue;
4458               if (!iv_ca_has_deps (ivs, cp))
4459                 continue;
4460
4461               if (!cheaper_cost_pair (cp, new_cp))
4462                 continue;
4463
4464               new_cp = cp;
4465             }
4466         }
4467       else
4468         {
4469           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4470             {
4471               if (ci == cand->id)
4472                 continue;
4473
4474               cnd = iv_cand (data, ci);
4475
4476               cp = get_use_iv_cost (data, use, cnd);
4477               if (!cp)
4478                 continue;
4479               if (!iv_ca_has_deps (ivs, cp))
4480                 continue;
4481
4482               if (!cheaper_cost_pair (cp, new_cp))
4483                 continue;
4484
4485               new_cp = cp;
4486             }
4487         }
4488
4489       if (!new_cp)
4490         {
4491           iv_ca_delta_free (delta);
4492           return INFTY;
4493         }
4494
4495       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4496     }
4497
4498   iv_ca_delta_commit (data, ivs, *delta, true);
4499   cost = iv_ca_cost (ivs);
4500   iv_ca_delta_commit (data, ivs, *delta, false);
4501
4502   return cost;
4503 }
4504
4505 /* Try optimizing the set of candidates IVS by removing candidates different
4506    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4507    differences in DELTA.  */
4508
4509 static unsigned
4510 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4511              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4512 {
4513   bitmap_iterator bi;
4514   struct iv_ca_delta *act_delta, *best_delta;
4515   unsigned i, best_cost, acost;
4516   struct iv_cand *cand;
4517
4518   best_delta = NULL;
4519   best_cost = iv_ca_cost (ivs);
4520
4521   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4522     {
4523       cand = iv_cand (data, i);
4524
4525       if (cand == except_cand)
4526         continue;
4527
4528       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4529
4530       if (acost < best_cost)
4531         {
4532           best_cost = acost;
4533           iv_ca_delta_free (&best_delta);
4534           best_delta = act_delta;
4535         }
4536       else
4537         iv_ca_delta_free (&act_delta);
4538     }
4539
4540   if (!best_delta)
4541     {
4542       *delta = NULL;
4543       return best_cost;
4544     }
4545
4546   /* Recurse to possibly remove other unnecessary ivs.  */
4547   iv_ca_delta_commit (data, ivs, best_delta, true);
4548   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4549   iv_ca_delta_commit (data, ivs, best_delta, false);
4550   *delta = iv_ca_delta_join (best_delta, *delta);
4551   return best_cost;
4552 }
4553
4554 /* Tries to extend the sets IVS in the best possible way in order
4555    to express the USE.  */
4556
4557 static bool
4558 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4559                   struct iv_use *use)
4560 {
4561   unsigned best_cost, act_cost;
4562   unsigned i;
4563   bitmap_iterator bi;
4564   struct iv_cand *cand;
4565   struct iv_ca_delta *best_delta = NULL, *act_delta;
4566   struct cost_pair *cp;
4567
4568   iv_ca_add_use (data, ivs, use);
4569   best_cost = iv_ca_cost (ivs);
4570
4571   cp = iv_ca_cand_for_use (ivs, use);
4572   if (cp)
4573     {
4574       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4575       iv_ca_set_no_cp (data, ivs, use);
4576     }
4577
4578   /* First try important candidates.  Only if it fails, try the specific ones.
4579      Rationale -- in loops with many variables the best choice often is to use
4580      just one generic biv.  If we added here many ivs specific to the uses,
4581      the optimization algorithm later would be likely to get stuck in a local
4582      minimum, thus causing us to create too many ivs.  The approach from
4583      few ivs to more seems more likely to be successful -- starting from few
4584      ivs, replacing an expensive use by a specific iv should always be a
4585      win.  */
4586   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4587     {
4588       cand = iv_cand (data, i);
4589
4590       if (iv_ca_cand_used_p (ivs, cand))
4591         continue;
4592
4593       cp = get_use_iv_cost (data, use, cand);
4594       if (!cp)
4595         continue;
4596
4597       iv_ca_set_cp (data, ivs, use, cp);
4598       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4599       iv_ca_set_no_cp (data, ivs, use);
4600       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4601
4602       if (act_cost < best_cost)
4603         {
4604           best_cost = act_cost;
4605
4606           iv_ca_delta_free (&best_delta);
4607           best_delta = act_delta;
4608         }
4609       else
4610         iv_ca_delta_free (&act_delta);
4611     }
4612
4613   if (best_cost == INFTY)
4614     {
4615       for (i = 0; i < use->n_map_members; i++)
4616         {
4617           cp = use->cost_map + i;
4618           cand = cp->cand;
4619           if (!cand)
4620             continue;
4621
4622           /* Already tried this.  */
4623           if (cand->important)
4624             continue;
4625
4626           if (iv_ca_cand_used_p (ivs, cand))
4627             continue;
4628
4629           act_delta = NULL;
4630           iv_ca_set_cp (data, ivs, use, cp);
4631           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4632           iv_ca_set_no_cp (data, ivs, use);
4633           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4634                                        cp, act_delta);
4635
4636           if (act_cost < best_cost)
4637             {
4638               best_cost = act_cost;
4639
4640               if (best_delta)
4641                 iv_ca_delta_free (&best_delta);
4642               best_delta = act_delta;
4643             }
4644           else
4645             iv_ca_delta_free (&act_delta);
4646         }
4647     }
4648
4649   iv_ca_delta_commit (data, ivs, best_delta, true);
4650   iv_ca_delta_free (&best_delta);
4651
4652   return (best_cost != INFTY);
4653 }
4654
4655 /* Finds an initial assignment of candidates to uses.  */
4656
4657 static struct iv_ca *
4658 get_initial_solution (struct ivopts_data *data)
4659 {
4660   struct iv_ca *ivs = iv_ca_new (data);
4661   unsigned i;
4662
4663   for (i = 0; i < n_iv_uses (data); i++)
4664     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4665       {
4666         iv_ca_free (&ivs);
4667         return NULL;
4668       }
4669
4670   return ivs;
4671 }
4672
4673 /* Tries to improve set of induction variables IVS.  */
4674
4675 static bool
4676 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4677 {
4678   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4679   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4680   struct iv_cand *cand;
4681
4682   /* Try extending the set of induction variables by one.  */
4683   for (i = 0; i < n_iv_cands (data); i++)
4684     {
4685       cand = iv_cand (data, i);
4686
4687       if (iv_ca_cand_used_p (ivs, cand))
4688         continue;
4689
4690       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4691       if (!act_delta)
4692         continue;
4693
4694       /* If we successfully added the candidate and the set is small enough,
4695          try optimizing it by removing other candidates.  */
4696       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4697         {
4698           iv_ca_delta_commit (data, ivs, act_delta, true);
4699           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4700           iv_ca_delta_commit (data, ivs, act_delta, false);
4701           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4702         }
4703
4704       if (acost < best_cost)
4705         {
4706           best_cost = acost;
4707           iv_ca_delta_free (&best_delta);
4708           best_delta = act_delta;
4709         }
4710       else
4711         iv_ca_delta_free (&act_delta);
4712     }
4713
4714   if (!best_delta)
4715     {
4716       /* Try removing the candidates from the set instead.  */
4717       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4718
4719       /* Nothing more we can do.  */
4720       if (!best_delta)
4721         return false;
4722     }
4723
4724   iv_ca_delta_commit (data, ivs, best_delta, true);
4725   gcc_assert (best_cost == iv_ca_cost (ivs));
4726   iv_ca_delta_free (&best_delta);
4727   return true;
4728 }
4729
4730 /* Attempts to find the optimal set of induction variables.  We do simple
4731    greedy heuristic -- we try to replace at most one candidate in the selected
4732    solution and remove the unused ivs while this improves the cost.  */
4733
4734 static struct iv_ca *
4735 find_optimal_iv_set (struct ivopts_data *data)
4736 {
4737   unsigned i;
4738   struct iv_ca *set;
4739   struct iv_use *use;
4740
4741   /* Get the initial solution.  */
4742   set = get_initial_solution (data);
4743   if (!set)
4744     {
4745       if (dump_file && (dump_flags & TDF_DETAILS))
4746         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4747       return NULL;
4748     }
4749
4750   if (dump_file && (dump_flags & TDF_DETAILS))
4751     {
4752       fprintf (dump_file, "Initial set of candidates:\n");
4753       iv_ca_dump (data, dump_file, set);
4754     }
4755
4756   while (try_improve_iv_set (data, set))
4757     {
4758       if (dump_file && (dump_flags & TDF_DETAILS))
4759         {
4760           fprintf (dump_file, "Improved to:\n");
4761           iv_ca_dump (data, dump_file, set);
4762         }
4763     }
4764
4765   if (dump_file && (dump_flags & TDF_DETAILS))
4766     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4767
4768   for (i = 0; i < n_iv_uses (data); i++)
4769     {
4770       use = iv_use (data, i);
4771       use->selected = iv_ca_cand_for_use (set, use)->cand;
4772     }
4773
4774   return set;
4775 }
4776
4777 /* Creates a new induction variable corresponding to CAND.  */
4778
4779 static void
4780 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4781 {
4782   block_stmt_iterator incr_pos;
4783   tree base;
4784   bool after = false;
4785
4786   if (!cand->iv)
4787     return;
4788
4789   switch (cand->pos)
4790     {
4791     case IP_NORMAL:
4792       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4793       break;
4794
4795     case IP_END:
4796       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4797       after = true;
4798       break;
4799
4800     case IP_ORIGINAL:
4801       /* Mark that the iv is preserved.  */
4802       name_info (data, cand->var_before)->preserve_biv = true;
4803       name_info (data, cand->var_after)->preserve_biv = true;
4804
4805       /* Rewrite the increment so that it uses var_before directly.  */
4806       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4807
4808       return;
4809     }
4810
4811   gimple_add_tmp_var (cand->var_before);
4812   add_referenced_var (cand->var_before);
4813
4814   base = unshare_expr (cand->iv->base);
4815
4816   create_iv (base, unshare_expr (cand->iv->step),
4817              cand->var_before, data->current_loop,
4818              &incr_pos, after, &cand->var_before, &cand->var_after);
4819 }
4820
4821 /* Creates new induction variables described in SET.  */
4822
4823 static void
4824 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4825 {
4826   unsigned i;
4827   struct iv_cand *cand;
4828   bitmap_iterator bi;
4829
4830   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4831     {
4832       cand = iv_cand (data, i);
4833       create_new_iv (data, cand);
4834     }
4835 }
4836
4837 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4838    is true, remove also the ssa name defined by the statement.  */
4839
4840 static void
4841 remove_statement (tree stmt, bool including_defined_name)
4842 {
4843   if (TREE_CODE (stmt) == PHI_NODE)
4844     {
4845       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4846     }
4847   else
4848     {
4849       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4850
4851       bsi_remove (&bsi, true);
4852       release_defs (stmt);
4853     }
4854 }
4855
4856 /* Rewrites USE (definition of iv used in a nonlinear expression)
4857    using candidate CAND.  */
4858
4859 static void
4860 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4861                             struct iv_use *use, struct iv_cand *cand)
4862 {
4863   tree comp;
4864   tree op, stmts, tgt, ass;
4865   block_stmt_iterator bsi;
4866
4867   /* An important special case -- if we are asked to express value of
4868      the original iv by itself, just exit; there is no need to
4869      introduce a new computation (that might also need casting the
4870      variable to unsigned and back).  */
4871   if (cand->pos == IP_ORIGINAL
4872       && cand->incremented_at == use->stmt)
4873     {
4874       tree step, ctype, utype;
4875       enum tree_code incr_code = PLUS_EXPR;
4876
4877       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4878       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4879
4880       step = cand->iv->step;
4881       ctype = TREE_TYPE (step);
4882       utype = TREE_TYPE (cand->var_after);
4883       if (TREE_CODE (step) == NEGATE_EXPR)
4884         {
4885           incr_code = MINUS_EXPR;
4886           step = TREE_OPERAND (step, 0);
4887         }
4888
4889       /* Check whether we may leave the computation unchanged.
4890          This is the case only if it does not rely on other
4891          computations in the loop -- otherwise, the computation
4892          we rely upon may be removed in remove_unused_ivs,
4893          thus leading to ICE.  */
4894       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4895       if (TREE_CODE (op) == PLUS_EXPR
4896           || TREE_CODE (op) == MINUS_EXPR)
4897         {
4898           if (TREE_OPERAND (op, 0) == cand->var_before)
4899             op = TREE_OPERAND (op, 1);
4900           else if (TREE_CODE (op) == PLUS_EXPR
4901                    && TREE_OPERAND (op, 1) == cand->var_before)
4902             op = TREE_OPERAND (op, 0);
4903           else
4904             op = NULL_TREE;
4905         }
4906       else
4907         op = NULL_TREE;
4908
4909       if (op
4910           && (TREE_CODE (op) == INTEGER_CST
4911               || operand_equal_p (op, step, 0)))
4912         return;
4913
4914       /* Otherwise, add the necessary computations to express
4915          the iv.  */
4916       op = fold_convert (ctype, cand->var_before);
4917       comp = fold_convert (utype,
4918                            build2 (incr_code, ctype, op,
4919                                    unshare_expr (step)));
4920     }
4921   else
4922     {
4923       comp = get_computation (data->current_loop, use, cand);
4924       gcc_assert (comp != NULL_TREE);
4925     }
4926
4927   switch (TREE_CODE (use->stmt))
4928     {
4929     case PHI_NODE:
4930       tgt = PHI_RESULT (use->stmt);
4931
4932       /* If we should keep the biv, do not replace it.  */
4933       if (name_info (data, tgt)->preserve_biv)
4934         return;
4935
4936       bsi = bsi_after_labels (bb_for_stmt (use->stmt));
4937       break;
4938
4939     case GIMPLE_MODIFY_STMT:
4940       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4941       bsi = bsi_for_stmt (use->stmt);
4942       break;
4943
4944     default:
4945       gcc_unreachable ();
4946     }
4947
4948   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4949   if (stmts)
4950     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4951
4952   if (TREE_CODE (use->stmt) == PHI_NODE)
4953     {
4954       ass = build_gimple_modify_stmt (tgt, op);
4955       bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
4956       remove_statement (use->stmt, false);
4957       SSA_NAME_DEF_STMT (tgt) = ass;
4958     }
4959   else
4960     GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4961 }
4962
4963 /* Replaces ssa name in index IDX by its basic variable.  Callback for
4964    for_each_index.  */
4965
4966 static bool
4967 idx_remove_ssa_names (tree base, tree *idx,
4968                       void *data ATTRIBUTE_UNUSED)
4969 {
4970   tree *op;
4971
4972   if (TREE_CODE (*idx) == SSA_NAME)
4973     *idx = SSA_NAME_VAR (*idx);
4974
4975   if (TREE_CODE (base) == ARRAY_REF)
4976     {
4977       op = &TREE_OPERAND (base, 2);
4978       if (*op
4979           && TREE_CODE (*op) == SSA_NAME)
4980         *op = SSA_NAME_VAR (*op);
4981       op = &TREE_OPERAND (base, 3);
4982       if (*op
4983           && TREE_CODE (*op) == SSA_NAME)
4984         *op = SSA_NAME_VAR (*op);
4985     }
4986
4987   return true;
4988 }
4989
4990 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
4991
4992 static tree
4993 unshare_and_remove_ssa_names (tree ref)
4994 {
4995   ref = unshare_expr (ref);
4996   for_each_index (&ref, idx_remove_ssa_names, NULL);
4997
4998   return ref;
4999 }
5000
5001 /* Extract the alias analysis info for the memory reference REF.  There are
5002    several ways how this information may be stored and what precisely is
5003    its semantics depending on the type of the reference, but there always is
5004    somewhere hidden one _DECL node that is used to determine the set of
5005    virtual operands for the reference.  The code below deciphers this jungle
5006    and extracts this single useful piece of information.  */
5007
5008 static tree
5009 get_ref_tag (tree ref, tree orig)
5010 {
5011   tree var = get_base_address (ref);
5012   tree aref = NULL_TREE, tag, sv;
5013   HOST_WIDE_INT offset, size, maxsize;
5014
5015   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5016     {
5017       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5018       if (ref)
5019         break;
5020     }
5021
5022   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5023     return unshare_expr (sv);
5024
5025   if (!var)
5026     return NULL_TREE;
5027
5028   if (TREE_CODE (var) == INDIRECT_REF)
5029     {
5030       /* If the base is a dereference of a pointer, first check its name memory
5031          tag.  If it does not have one, use its symbol memory tag.  */
5032       var = TREE_OPERAND (var, 0);
5033       if (TREE_CODE (var) != SSA_NAME)
5034         return NULL_TREE;
5035
5036       if (SSA_NAME_PTR_INFO (var))
5037         {
5038           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5039           if (tag)
5040             return tag;
5041         }
5042
5043       var = SSA_NAME_VAR (var);
5044       tag = symbol_mem_tag (var);
5045       gcc_assert (tag != NULL_TREE);
5046       return tag;
5047     }
5048   else
5049     {
5050       if (!DECL_P (var))
5051         return NULL_TREE;
5052
5053       tag = symbol_mem_tag (var);
5054       if (tag)
5055         return tag;
5056
5057       return var;
5058     }
5059 }
5060
5061 /* Copies the reference information from OLD_REF to NEW_REF.  */
5062
5063 static void
5064 copy_ref_info (tree new_ref, tree old_ref)
5065 {
5066   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5067     copy_mem_ref_info (new_ref, old_ref);
5068   else
5069     {
5070       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5071       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5072     }
5073 }
5074
5075 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5076
5077 static void
5078 rewrite_use_address (struct ivopts_data *data,
5079                      struct iv_use *use, struct iv_cand *cand)
5080 {
5081   aff_tree aff;
5082   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5083   tree ref;
5084   bool ok;
5085
5086   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5087   gcc_assert (ok);
5088   unshare_aff_combination (&aff);
5089
5090   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5091   copy_ref_info (ref, *use->op_p);
5092   *use->op_p = ref;
5093 }
5094
5095 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5096    candidate CAND.  */
5097
5098 static void
5099 rewrite_use_compare (struct ivopts_data *data,
5100                      struct iv_use *use, struct iv_cand *cand)
5101 {
5102   tree comp, *var_p, op, bound;
5103   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5104   enum tree_code compare;
5105   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5106   bool ok;
5107
5108   bound = cp->value;
5109   if (bound)
5110     {
5111       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5112       tree var_type = TREE_TYPE (var);
5113
5114       compare = iv_elimination_compare (data, use);
5115       bound = unshare_expr (fold_convert (var_type, bound));
5116       op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE);
5117
5118       *use->op_p = build2 (compare, boolean_type_node, var, op);
5119       return;
5120     }
5121
5122   /* The induction variable elimination failed; just express the original
5123      giv.  */
5124   comp = get_computation (data->current_loop, use, cand);
5125   gcc_assert (comp != NULL_TREE);
5126
5127   ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5128   gcc_assert (ok);
5129
5130   *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p));
5131 }
5132
5133 /* Rewrites USE using candidate CAND.  */
5134
5135 static void
5136 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5137 {
5138   push_stmt_changes (&use->stmt);
5139
5140   switch (use->type)
5141     {
5142       case USE_NONLINEAR_EXPR:
5143         rewrite_use_nonlinear_expr (data, use, cand);
5144         break;
5145
5146       case USE_ADDRESS:
5147         rewrite_use_address (data, use, cand);
5148         break;
5149
5150       case USE_COMPARE:
5151         rewrite_use_compare (data, use, cand);
5152         break;
5153
5154       default:
5155         gcc_unreachable ();
5156     }
5157
5158   pop_stmt_changes (&use->stmt);
5159 }
5160
5161 /* Rewrite the uses using the selected induction variables.  */
5162
5163 static void
5164 rewrite_uses (struct ivopts_data *data)
5165 {
5166   unsigned i;
5167   struct iv_cand *cand;
5168   struct iv_use *use;
5169
5170   for (i = 0; i < n_iv_uses (data); i++)
5171     {
5172       use = iv_use (data, i);
5173       cand = use->selected;
5174       gcc_assert (cand);
5175
5176       rewrite_use (data, use, cand);
5177     }
5178 }
5179
5180 /* Removes the ivs that are not used after rewriting.  */
5181
5182 static void
5183 remove_unused_ivs (struct ivopts_data *data)
5184 {
5185   unsigned j;
5186   bitmap_iterator bi;
5187
5188   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5189     {
5190       struct version_info *info;
5191
5192       info = ver_info (data, j);
5193       if (info->iv
5194           && !integer_zerop (info->iv->step)
5195           && !info->inv_id
5196           && !info->iv->have_use_for
5197           && !info->preserve_biv)
5198         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5199     }
5200 }
5201
5202 /* Frees data allocated by the optimization of a single loop.  */
5203
5204 static void
5205 free_loop_data (struct ivopts_data *data)
5206 {
5207   unsigned i, j;
5208   bitmap_iterator bi;
5209   tree obj;
5210
5211   if (data->niters)
5212     {
5213       pointer_map_destroy (data->niters);
5214       data->niters = NULL;
5215     }
5216
5217   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5218     {
5219       struct version_info *info;
5220
5221       info = ver_info (data, i);
5222       if (info->iv)
5223         free (info->iv);
5224       info->iv = NULL;
5225       info->has_nonlin_use = false;
5226       info->preserve_biv = false;
5227       info->inv_id = 0;
5228     }
5229   bitmap_clear (data->relevant);
5230   bitmap_clear (data->important_candidates);
5231
5232   for (i = 0; i < n_iv_uses (data); i++)
5233     {
5234       struct iv_use *use = iv_use (data, i);
5235
5236       free (use->iv);
5237       BITMAP_FREE (use->related_cands);
5238       for (j = 0; j < use->n_map_members; j++)
5239         if (use->cost_map[j].depends_on)
5240           BITMAP_FREE (use->cost_map[j].depends_on);
5241       free (use->cost_map);
5242       free (use);
5243     }
5244   VEC_truncate (iv_use_p, data->iv_uses, 0);
5245
5246   for (i = 0; i < n_iv_cands (data); i++)
5247     {
5248       struct iv_cand *cand = iv_cand (data, i);
5249
5250       if (cand->iv)
5251         free (cand->iv);
5252       if (cand->depends_on)
5253         BITMAP_FREE (cand->depends_on);
5254       free (cand);
5255     }
5256   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5257
5258   if (data->version_info_size < num_ssa_names)
5259     {
5260       data->version_info_size = 2 * num_ssa_names;
5261       free (data->version_info);
5262       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5263     }
5264
5265   data->max_inv_id = 0;
5266
5267   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5268     SET_DECL_RTL (obj, NULL_RTX);
5269
5270   VEC_truncate (tree, decl_rtl_to_reset, 0);
5271 }
5272
5273 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5274    loop tree.  */
5275
5276 static void
5277 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5278 {
5279   free_loop_data (data);
5280   free (data->version_info);
5281   BITMAP_FREE (data->relevant);
5282   BITMAP_FREE (data->important_candidates);
5283
5284   VEC_free (tree, heap, decl_rtl_to_reset);
5285   VEC_free (iv_use_p, heap, data->iv_uses);
5286   VEC_free (iv_cand_p, heap, data->iv_candidates);
5287 }
5288
5289 /* Optimizes the LOOP.  Returns true if anything changed.  */
5290
5291 static bool
5292 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5293 {
5294   bool changed = false;
5295   struct iv_ca *iv_ca;
5296   edge exit;
5297
5298   gcc_assert (!data->niters);
5299   data->current_loop = loop;
5300
5301   if (dump_file && (dump_flags & TDF_DETAILS))
5302     {
5303       fprintf (dump_file, "Processing loop %d\n", loop->num);
5304
5305       exit = single_dom_exit (loop);
5306       if (exit)
5307         {
5308           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5309                    exit->src->index, exit->dest->index);
5310           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5311           fprintf (dump_file, "\n");
5312         }
5313
5314       fprintf (dump_file, "\n");
5315     }
5316
5317   /* For each ssa name determines whether it behaves as an induction variable
5318      in some loop.  */
5319   if (!find_induction_variables (data))
5320     goto finish;
5321
5322   /* Finds interesting uses (item 1).  */
5323   find_interesting_uses (data);
5324   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5325     goto finish;
5326
5327   /* Finds candidates for the induction variables (item 2).  */
5328   find_iv_candidates (data);
5329
5330   /* Calculates the costs (item 3, part 1).  */
5331   determine_use_iv_costs (data);
5332   determine_iv_costs (data);
5333   determine_set_costs (data);
5334
5335   /* Find the optimal set of induction variables (item 3, part 2).  */
5336   iv_ca = find_optimal_iv_set (data);
5337   if (!iv_ca)
5338     goto finish;
5339   changed = true;
5340
5341   /* Create the new induction variables (item 4, part 1).  */
5342   create_new_ivs (data, iv_ca);
5343   iv_ca_free (&iv_ca);
5344
5345   /* Rewrite the uses (item 4, part 2).  */
5346   rewrite_uses (data);
5347
5348   /* Remove the ivs that are unused after rewriting.  */
5349   remove_unused_ivs (data);
5350
5351   /* We have changed the structure of induction variables; it might happen
5352      that definitions in the scev database refer to some of them that were
5353      eliminated.  */
5354   scev_reset ();
5355
5356 finish:
5357   free_loop_data (data);
5358
5359   return changed;
5360 }
5361
5362 /* Main entry point.  Optimizes induction variables in loops.  */
5363
5364 void
5365 tree_ssa_iv_optimize (void)
5366 {
5367   struct loop *loop;
5368   struct ivopts_data data;
5369   loop_iterator li;
5370
5371   tree_ssa_iv_optimize_init (&data);
5372
5373   /* Optimize the loops starting with the innermost ones.  */
5374   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5375     {
5376       if (dump_file && (dump_flags & TDF_DETAILS))
5377         flow_loop_dump (loop, dump_file, NULL, 1);
5378
5379       tree_ssa_iv_optimize_loop (&data, loop);
5380     }
5381
5382   tree_ssa_iv_optimize_finalize (&data);
5383 }