gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "rtl.h"
  70 #include "tm_p.h"
  71 #include "hard-reg-set.h"
  72 #include "basic-block.h"
  73 #include "output.h"
  74 #include "diagnostic.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "varray.h"
  80 #include "expr.h"
  81 #include "tree-pass.h"
  82 #include "ggc.h"
  83 #include "insn-config.h"
  84 #include "recog.h"
  85 #include "pointer-set.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92 #include "tree-affine.h"
  93 #include "target.h"
  94
  95 /* The infinite cost.  */
  96 #define INFTY 10000000
  97
  98 /* The expected number of loop iterations.  TODO -- use profiling instead of
  99    this.  */
 100 #define AVG_LOOP_NITER(LOOP) 5
 101
 102
 103 /* Representation of the induction variable.  */
 104 struct iv
 105 {
 106   tree base;            /* Initial value of the iv.  */
 107   tree base_object;     /* A memory object to that the induction variable points.  */
 108   tree step;            /* Step of the iv (constant only).  */
 109   tree ssa_name;        /* The ssa name with the value.  */
 110   bool biv_p;           /* Is it a biv?  */
 111   bool have_use_for;    /* Do we already have a use for it?  */
 112   unsigned use_id;      /* The identifier in the use if it is the case.  */
 113 };
 114
 115 /* Per-ssa version information (induction variable descriptions, etc.).  */
 116 struct version_info
 117 {
 118   tree name;            /* The ssa name.  */
 119   struct iv *iv;        /* Induction variable description.  */
 120   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 121                            an expression that is not an induction variable.  */
 122   unsigned inv_id;      /* Id of an invariant.  */
 123   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 124 };
 125
 126 /* Types of uses.  */
 127 enum use_type
 128 {
 129   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 130   USE_ADDRESS,          /* Use in an address.  */
 131   USE_COMPARE           /* Use is a compare.  */
 132 };
 133
 134 /* Cost of a computation.  */
 135 typedef struct
 136 {
 137   unsigned cost;        /* The runtime cost.  */
 138   unsigned complexity;  /* The estimate of the complexity of the code for
 139                            the computation (in no concrete units --
 140                            complexity field should be larger for more
 141                            complex expressions and addressing modes).  */
 142 } comp_cost;
 143
 144 static const comp_cost zero_cost = {0, 0};
 145 static const comp_cost infinite_cost = {INFTY, INFTY};
 146
 147 /* The candidate - cost pair.  */
 148 struct cost_pair
 149 {
 150   struct iv_cand *cand; /* The candidate.  */
 151   comp_cost cost;       /* The cost.  */
 152   bitmap depends_on;    /* The list of invariants that have to be
 153                            preserved.  */
 154   tree value;           /* For final value elimination, the expression for
 155                            the final value of the iv.  For iv elimination,
 156                            the new bound to compare with.  */
 157 };
 158
 159 /* Use.  */
 160 struct iv_use
 161 {
 162   unsigned id;          /* The id of the use.  */
 163   enum use_type type;   /* Type of the use.  */
 164   struct iv *iv;        /* The induction variable it is based on.  */
 165   tree stmt;            /* Statement in that it occurs.  */
 166   tree *op_p;           /* The place where it occurs.  */
 167   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 168                            important ones.  */
 169
 170   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 171   struct cost_pair *cost_map;
 172                         /* The costs wrto the iv candidates.  */
 173
 174   struct iv_cand *selected;
 175                         /* The selected candidate.  */
 176 };
 177
 178 /* The position where the iv is computed.  */
 179 enum iv_position
 180 {
 181   IP_NORMAL,            /* At the end, just before the exit condition.  */
 182   IP_END,               /* At the end of the latch block.  */
 183   IP_ORIGINAL           /* The original biv.  */
 184 };
 185
 186 /* The induction variable candidate.  */
 187 struct iv_cand
 188 {
 189   unsigned id;          /* The number of the candidate.  */
 190   bool important;       /* Whether this is an "important" candidate, i.e. such
 191                            that it should be considered by all uses.  */
 192   enum iv_position pos; /* Where it is computed.  */
 193   tree incremented_at;  /* For original biv, the statement where it is
 194                            incremented.  */
 195   tree var_before;      /* The variable used for it before increment.  */
 196   tree var_after;       /* The variable used for it after increment.  */
 197   struct iv *iv;        /* The value of the candidate.  NULL for
 198                            "pseudocandidate" used to indicate the possibility
 199                            to replace the final value of an iv by direct
 200                            computation of the value.  */
 201   unsigned cost;        /* Cost of the candidate.  */
 202   bitmap depends_on;    /* The list of invariants that are used in step of the
 203                            biv.  */
 204 };
 205
 206 /* The data used by the induction variable optimizations.  */
 207
 208 typedef struct iv_use *iv_use_p;
 209 DEF_VEC_P(iv_use_p);
 210 DEF_VEC_ALLOC_P(iv_use_p,heap);
 211
 212 typedef struct iv_cand *iv_cand_p;
 213 DEF_VEC_P(iv_cand_p);
 214 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 215
 216 struct ivopts_data
 217 {
 218   /* The currently optimized loop.  */
 219   struct loop *current_loop;
 220
 221   /* Number of registers used in it.  */
 222   unsigned regs_used;
 223
 224   /* Numbers of iterations for all exits of the current loop.  */
 225   struct pointer_map_t *niters;
 226
 227   /* The size of version_info array allocated.  */
 228   unsigned version_info_size;
 229
 230   /* The array of information for the ssa names.  */
 231   struct version_info *version_info;
 232
 233   /* The bitmap of indices in version_info whose value was changed.  */
 234   bitmap relevant;
 235
 236   /* The maximum invariant id.  */
 237   unsigned max_inv_id;
 238
 239   /* The uses of induction variables.  */
 240   VEC(iv_use_p,heap) *iv_uses;
 241
 242   /* The candidates.  */
 243   VEC(iv_cand_p,heap) *iv_candidates;
 244
 245   /* A bitmap of important candidates.  */
 246   bitmap important_candidates;
 247
 248   /* Whether to consider just related and important candidates when replacing a
 249      use.  */
 250   bool consider_all_candidates;
 251 };
 252
 253 /* An assignment of iv candidates to uses.  */
 254
 255 struct iv_ca
 256 {
 257   /* The number of uses covered by the assignment.  */
 258   unsigned upto;
 259
 260   /* Number of uses that cannot be expressed by the candidates in the set.  */
 261   unsigned bad_uses;
 262
 263   /* Candidate assigned to a use, together with the related costs.  */
 264   struct cost_pair **cand_for_use;
 265
 266   /* Number of times each candidate is used.  */
 267   unsigned *n_cand_uses;
 268
 269   /* The candidates used.  */
 270   bitmap cands;
 271
 272   /* The number of candidates in the set.  */
 273   unsigned n_cands;
 274
 275   /* Total number of registers needed.  */
 276   unsigned n_regs;
 277
 278   /* Total cost of expressing uses.  */
 279   comp_cost cand_use_cost;
 280
 281   /* Total cost of candidates.  */
 282   unsigned cand_cost;
 283
 284   /* Number of times each invariant is used.  */
 285   unsigned *n_invariant_uses;
 286
 287   /* Total cost of the assignment.  */
 288   comp_cost cost;
 289 };
 290
 291 /* Difference of two iv candidate assignments.  */
 292
 293 struct iv_ca_delta
 294 {
 295   /* Changed use.  */
 296   struct iv_use *use;
 297
 298   /* An old assignment (for rollback purposes).  */
 299   struct cost_pair *old_cp;
 300
 301   /* A new assignment.  */
 302   struct cost_pair *new_cp;
 303
 304   /* Next change in the list.  */
 305   struct iv_ca_delta *next_change;
 306 };
 307
 308 /* Bound on number of candidates below that all candidates are considered.  */
 309
 310 #define CONSIDER_ALL_CANDIDATES_BOUND \
 311   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 312
 313 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 314    optimizing such a loop would help, and it would take ages).  */
 315
 316 #define MAX_CONSIDERED_USES \
 317   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 318
 319 /* If there are at most this number of ivs in the set, try removing unnecessary
 320    ivs from the set always.  */
 321
 322 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 323   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 324
 325 /* The list of trees for that the decl_rtl field must be reset is stored
 326    here.  */
 327
 328 static VEC(tree,heap) *decl_rtl_to_reset;
 329
 330 /* Number of uses recorded in DATA.  */
 331
 332 static inline unsigned
 333 n_iv_uses (struct ivopts_data *data)
 334 {
 335   return VEC_length (iv_use_p, data->iv_uses);
 336 }
 337
 338 /* Ith use recorded in DATA.  */
 339
 340 static inline struct iv_use *
 341 iv_use (struct ivopts_data *data, unsigned i)
 342 {
 343   return VEC_index (iv_use_p, data->iv_uses, i);
 344 }
 345
 346 /* Number of candidates recorded in DATA.  */
 347
 348 static inline unsigned
 349 n_iv_cands (struct ivopts_data *data)
 350 {
 351   return VEC_length (iv_cand_p, data->iv_candidates);
 352 }
 353
 354 /* Ith candidate recorded in DATA.  */
 355
 356 static inline struct iv_cand *
 357 iv_cand (struct ivopts_data *data, unsigned i)
 358 {
 359   return VEC_index (iv_cand_p, data->iv_candidates, i);
 360 }
 361
 362 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 363
 364 edge
 365 single_dom_exit (struct loop *loop)
 366 {
 367   edge exit = single_exit (loop);
 368
 369   if (!exit)
 370     return NULL;
 371
 372   if (!just_once_each_iteration_p (loop, exit->src))
 373     return NULL;
 374
 375   return exit;
 376 }
 377
 378 /* Dumps information about the induction variable IV to FILE.  */
 379
 380 extern void dump_iv (FILE *, struct iv *);
 381 void
 382 dump_iv (FILE *file, struct iv *iv)
 383 {
 384   if (iv->ssa_name)
 385     {
 386       fprintf (file, "ssa name ");
 387       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 388       fprintf (file, "\n");
 389     }
 390
 391   fprintf (file, "  type ");
 392   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 393   fprintf (file, "\n");
 394
 395   if (iv->step)
 396     {
 397       fprintf (file, "  base ");
 398       print_generic_expr (file, iv->base, TDF_SLIM);
 399       fprintf (file, "\n");
 400
 401       fprintf (file, "  step ");
 402       print_generic_expr (file, iv->step, TDF_SLIM);
 403       fprintf (file, "\n");
 404     }
 405   else
 406     {
 407       fprintf (file, "  invariant ");
 408       print_generic_expr (file, iv->base, TDF_SLIM);
 409       fprintf (file, "\n");
 410     }
 411
 412   if (iv->base_object)
 413     {
 414       fprintf (file, "  base object ");
 415       print_generic_expr (file, iv->base_object, TDF_SLIM);
 416       fprintf (file, "\n");
 417     }
 418
 419   if (iv->biv_p)
 420     fprintf (file, "  is a biv\n");
 421 }
 422
 423 /* Dumps information about the USE to FILE.  */
 424
 425 extern void dump_use (FILE *, struct iv_use *);
 426 void
 427 dump_use (FILE *file, struct iv_use *use)
 428 {
 429   fprintf (file, "use %d\n", use->id);
 430
 431   switch (use->type)
 432     {
 433     case USE_NONLINEAR_EXPR:
 434       fprintf (file, "  generic\n");
 435       break;
 436
 437     case USE_ADDRESS:
 438       fprintf (file, "  address\n");
 439       break;
 440
 441     case USE_COMPARE:
 442       fprintf (file, "  compare\n");
 443       break;
 444
 445     default:
 446       gcc_unreachable ();
 447     }
 448
 449   fprintf (file, "  in statement ");
 450   print_generic_expr (file, use->stmt, TDF_SLIM);
 451   fprintf (file, "\n");
 452
 453   fprintf (file, "  at position ");
 454   if (use->op_p)
 455     print_generic_expr (file, *use->op_p, TDF_SLIM);
 456   fprintf (file, "\n");
 457
 458   dump_iv (file, use->iv);
 459
 460   if (use->related_cands)
 461     {
 462       fprintf (file, "  related candidates ");
 463       dump_bitmap (file, use->related_cands);
 464     }
 465 }
 466
 467 /* Dumps information about the uses to FILE.  */
 468
 469 extern void dump_uses (FILE *, struct ivopts_data *);
 470 void
 471 dump_uses (FILE *file, struct ivopts_data *data)
 472 {
 473   unsigned i;
 474   struct iv_use *use;
 475
 476   for (i = 0; i < n_iv_uses (data); i++)
 477     {
 478       use = iv_use (data, i);
 479
 480       dump_use (file, use);
 481       fprintf (file, "\n");
 482     }
 483 }
 484
 485 /* Dumps information about induction variable candidate CAND to FILE.  */
 486
 487 extern void dump_cand (FILE *, struct iv_cand *);
 488 void
 489 dump_cand (FILE *file, struct iv_cand *cand)
 490 {
 491   struct iv *iv = cand->iv;
 492
 493   fprintf (file, "candidate %d%s\n",
 494            cand->id, cand->important ? " (important)" : "");
 495
 496   if (cand->depends_on)
 497     {
 498       fprintf (file, "  depends on ");
 499       dump_bitmap (file, cand->depends_on);
 500     }
 501
 502   if (!iv)
 503     {
 504       fprintf (file, "  final value replacement\n");
 505       return;
 506     }
 507
 508   switch (cand->pos)
 509     {
 510     case IP_NORMAL:
 511       fprintf (file, "  incremented before exit test\n");
 512       break;
 513
 514     case IP_END:
 515       fprintf (file, "  incremented at end\n");
 516       break;
 517
 518     case IP_ORIGINAL:
 519       fprintf (file, "  original biv\n");
 520       break;
 521     }
 522
 523   dump_iv (file, iv);
 524 }
 525
 526 /* Returns the info for ssa version VER.  */
 527
 528 static inline struct version_info *
 529 ver_info (struct ivopts_data *data, unsigned ver)
 530 {
 531   return data->version_info + ver;
 532 }
 533
 534 /* Returns the info for ssa name NAME.  */
 535
 536 static inline struct version_info *
 537 name_info (struct ivopts_data *data, tree name)
 538 {
 539   return ver_info (data, SSA_NAME_VERSION (name));
 540 }
 541
 542 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 543    emitted in LOOP.  */
 544
 545 static bool
 546 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 547 {
 548   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 549
 550   gcc_assert (bb);
 551
 552   if (sbb == loop->latch)
 553     return true;
 554
 555   if (sbb != bb)
 556     return false;
 557
 558   return stmt == last_stmt (bb);
 559 }
 560
 561 /* Returns true if STMT if after the place where the original induction
 562    variable CAND is incremented.  */
 563
 564 static bool
 565 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 566 {
 567   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 568   basic_block stmt_bb = bb_for_stmt (stmt);
 569   block_stmt_iterator bsi;
 570
 571   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 572     return false;
 573
 574   if (stmt_bb != cand_bb)
 575     return true;
 576
 577   /* Scan the block from the end, since the original ivs are usually
 578      incremented at the end of the loop body.  */
 579   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 580     {
 581       if (bsi_stmt (bsi) == cand->incremented_at)
 582         return false;
 583       if (bsi_stmt (bsi) == stmt)
 584         return true;
 585     }
 586 }
 587
 588 /* Returns true if STMT if after the place where the induction variable
 589    CAND is incremented in LOOP.  */
 590
 591 static bool
 592 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 593 {
 594   switch (cand->pos)
 595     {
 596     case IP_END:
 597       return false;
 598
 599     case IP_NORMAL:
 600       return stmt_after_ip_normal_pos (loop, stmt);
 601
 602     case IP_ORIGINAL:
 603       return stmt_after_ip_original_pos (cand, stmt);
 604
 605     default:
 606       gcc_unreachable ();
 607     }
 608 }
 609
 610 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 611
 612 static bool
 613 abnormal_ssa_name_p (tree exp)
 614 {
 615   if (!exp)
 616     return false;
 617
 618   if (TREE_CODE (exp) != SSA_NAME)
 619     return false;
 620
 621   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 622 }
 623
 624 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 625    abnormal phi node.  Callback for for_each_index.  */
 626
 627 static bool
 628 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 629                                   void *data ATTRIBUTE_UNUSED)
 630 {
 631   if (TREE_CODE (base) == ARRAY_REF)
 632     {
 633       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 634         return false;
 635       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 636         return false;
 637     }
 638
 639   return !abnormal_ssa_name_p (*index);
 640 }
 641
 642 /* Returns true if EXPR contains a ssa name that occurs in an
 643    abnormal phi node.  */
 644
 645 bool
 646 contains_abnormal_ssa_name_p (tree expr)
 647 {
 648   enum tree_code code;
 649   enum tree_code_class codeclass;
 650
 651   if (!expr)
 652     return false;
 653
 654   code = TREE_CODE (expr);
 655   codeclass = TREE_CODE_CLASS (code);
 656
 657   if (code == SSA_NAME)
 658     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 659
 660   if (code == INTEGER_CST
 661       || is_gimple_min_invariant (expr))
 662     return false;
 663
 664   if (code == ADDR_EXPR)
 665     return !for_each_index (&TREE_OPERAND (expr, 0),
 666                             idx_contains_abnormal_ssa_name_p,
 667                             NULL);
 668
 669   switch (codeclass)
 670     {
 671     case tcc_binary:
 672     case tcc_comparison:
 673       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 674         return true;
 675
 676       /* Fallthru.  */
 677     case tcc_unary:
 678       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 679         return true;
 680
 681       break;
 682
 683     default:
 684       gcc_unreachable ();
 685     }
 686
 687   return false;
 688 }
 689
 690 /*  Returns tree describing number of iterations determined from
 691     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 692
 693 static tree
 694 niter_for_exit (struct ivopts_data *data, edge exit)
 695 {
 696   struct tree_niter_desc desc;
 697   tree niter;
 698   void **slot;
 699
 700   if (!data->niters)
 701     {
 702       data->niters = pointer_map_create ();
 703       slot = NULL;
 704     }
 705   else
 706     slot = pointer_map_contains (data->niters, exit);
 707
 708   if (!slot)
 709     {
 710       /* Try to determine number of iterations.  We must know it
 711          unconditionally (i.e., without possibility of # of iterations
 712          being zero).  Also, we cannot safely work with ssa names that
 713          appear in phi nodes on abnormal edges, so that we do not create
 714          overlapping life ranges for them (PR 27283).  */
 715       if (number_of_iterations_exit (data->current_loop,
 716                                      exit, &desc, true)
 717           && integer_zerop (desc.may_be_zero)
 718           && !contains_abnormal_ssa_name_p (desc.niter))
 719         niter = desc.niter;
 720       else
 721         niter = NULL_TREE;
 722
 723       *pointer_map_insert (data->niters, exit) = niter;
 724     }
 725   else
 726     niter = (tree) *slot;
 727
 728   return niter;
 729 }
 730
 731 /* Returns tree describing number of iterations determined from
 732    single dominating exit of DATA->current_loop, or NULL if something
 733    goes wrong.  */
 734
 735 static tree
 736 niter_for_single_dom_exit (struct ivopts_data *data)
 737 {
 738   edge exit = single_dom_exit (data->current_loop);
 739
 740   if (!exit)
 741     return NULL;
 742
 743   return niter_for_exit (data, exit);
 744 }
 745
 746 /* Initializes data structures used by the iv optimization pass, stored
 747    in DATA.  */
 748
 749 static void
 750 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 751 {
 752   data->version_info_size = 2 * num_ssa_names;
 753   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 754   data->relevant = BITMAP_ALLOC (NULL);
 755   data->important_candidates = BITMAP_ALLOC (NULL);
 756   data->max_inv_id = 0;
 757   data->niters = NULL;
 758   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 759   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 760   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 761 }
 762
 763 /* Returns a memory object to that EXPR points.  In case we are able to
 764    determine that it does not point to any such object, NULL is returned.  */
 765
 766 static tree
 767 determine_base_object (tree expr)
 768 {
 769   enum tree_code code = TREE_CODE (expr);
 770   tree base, obj;
 771
 772   /* If this is a pointer casted to any type, we need to determine
 773      the base object for the pointer; so handle conversions before
 774      throwing away non-pointer expressions.  */
 775   if (TREE_CODE (expr) == NOP_EXPR
 776       || TREE_CODE (expr) == CONVERT_EXPR)
 777     return determine_base_object (TREE_OPERAND (expr, 0));
 778
 779   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 780     return NULL_TREE;
 781
 782   switch (code)
 783     {
 784     case INTEGER_CST:
 785       return NULL_TREE;
 786
 787     case ADDR_EXPR:
 788       obj = TREE_OPERAND (expr, 0);
 789       base = get_base_address (obj);
 790
 791       if (!base)
 792         return expr;
 793
 794       if (TREE_CODE (base) == INDIRECT_REF)
 795         return determine_base_object (TREE_OPERAND (base, 0));
 796
 797       return fold_convert (ptr_type_node,
 798                            build_fold_addr_expr (base));
 799
 800     case POINTER_PLUS_EXPR:
 801       return determine_base_object (TREE_OPERAND (expr, 0));
 802
 803     case PLUS_EXPR:
 804     case MINUS_EXPR:
 805       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 806       gcc_unreachable ();
 807
 808     default:
 809       return fold_convert (ptr_type_node, expr);
 810     }
 811 }
 812
 813 /* Allocates an induction variable with given initial value BASE and step STEP
 814    for loop LOOP.  */
 815
 816 static struct iv *
 817 alloc_iv (tree base, tree step)
 818 {
 819   struct iv *iv = XCNEW (struct iv);
 820   gcc_assert (step != NULL_TREE);
 821
 822   iv->base = base;
 823   iv->base_object = determine_base_object (base);
 824   iv->step = step;
 825   iv->biv_p = false;
 826   iv->have_use_for = false;
 827   iv->use_id = 0;
 828   iv->ssa_name = NULL_TREE;
 829
 830   return iv;
 831 }
 832
 833 /* Sets STEP and BASE for induction variable IV.  */
 834
 835 static void
 836 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 837 {
 838   struct version_info *info = name_info (data, iv);
 839
 840   gcc_assert (!info->iv);
 841
 842   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 843   info->iv = alloc_iv (base, step);
 844   info->iv->ssa_name = iv;
 845 }
 846
 847 /* Finds induction variable declaration for VAR.  */
 848
 849 static struct iv *
 850 get_iv (struct ivopts_data *data, tree var)
 851 {
 852   basic_block bb;
 853   tree type = TREE_TYPE (var);
 854
 855   if (!POINTER_TYPE_P (type)
 856       && !INTEGRAL_TYPE_P (type))
 857     return NULL;
 858
 859   if (!name_info (data, var)->iv)
 860     {
 861       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 862
 863       if (!bb
 864           || !flow_bb_inside_loop_p (data->current_loop, bb))
 865         set_iv (data, var, var, build_int_cst (type, 0));
 866     }
 867
 868   return name_info (data, var)->iv;
 869 }
 870
 871 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 872    not define a simple affine biv with nonzero step.  */
 873
 874 static tree
 875 determine_biv_step (tree phi)
 876 {
 877   struct loop *loop = bb_for_stmt (phi)->loop_father;
 878   tree name = PHI_RESULT (phi);
 879   affine_iv iv;
 880
 881   if (!is_gimple_reg (name))
 882     return NULL_TREE;
 883
 884   if (!simple_iv (loop, phi, name, &iv, true))
 885     return NULL_TREE;
 886
 887   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 888 }
 889
 890 /* Finds basic ivs.  */
 891
 892 static bool
 893 find_bivs (struct ivopts_data *data)
 894 {
 895   tree phi, step, type, base;
 896   bool found = false;
 897   struct loop *loop = data->current_loop;
 898
 899   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 900     {
 901       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 902         continue;
 903
 904       step = determine_biv_step (phi);
 905       if (!step)
 906         continue;
 907
 908       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 909       base = expand_simple_operations (base);
 910       if (contains_abnormal_ssa_name_p (base)
 911           || contains_abnormal_ssa_name_p (step))
 912         continue;
 913
 914       type = TREE_TYPE (PHI_RESULT (phi));
 915       base = fold_convert (type, base);
 916       if (step)
 917         step = fold_convert (type, step);
 918
 919       set_iv (data, PHI_RESULT (phi), base, step);
 920       found = true;
 921     }
 922
 923   return found;
 924 }
 925
 926 /* Marks basic ivs.  */
 927
 928 static void
 929 mark_bivs (struct ivopts_data *data)
 930 {
 931   tree phi, var;
 932   struct iv *iv, *incr_iv;
 933   struct loop *loop = data->current_loop;
 934   basic_block incr_bb;
 935
 936   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 937     {
 938       iv = get_iv (data, PHI_RESULT (phi));
 939       if (!iv)
 940         continue;
 941
 942       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 943       incr_iv = get_iv (data, var);
 944       if (!incr_iv)
 945         continue;
 946
 947       /* If the increment is in the subloop, ignore it.  */
 948       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 949       if (incr_bb->loop_father != data->current_loop
 950           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 951         continue;
 952
 953       iv->biv_p = true;
 954       incr_iv->biv_p = true;
 955     }
 956 }
 957
 958 /* Checks whether STMT defines a linear induction variable and stores its
 959    parameters to IV.  */
 960
 961 static bool
 962 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 963 {
 964   tree lhs;
 965   struct loop *loop = data->current_loop;
 966
 967   iv->base = NULL_TREE;
 968   iv->step = NULL_TREE;
 969
 970   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 971     return false;
 972
 973   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 974   if (TREE_CODE (lhs) != SSA_NAME)
 975     return false;
 976
 977   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 978     return false;
 979   iv->base = expand_simple_operations (iv->base);
 980
 981   if (contains_abnormal_ssa_name_p (iv->base)
 982       || contains_abnormal_ssa_name_p (iv->step))
 983     return false;
 984
 985   return true;
 986 }
 987
 988 /* Finds general ivs in statement STMT.  */
 989
 990 static void
 991 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
 992 {
 993   affine_iv iv;
 994
 995   if (!find_givs_in_stmt_scev (data, stmt, &iv))
 996     return;
 997
 998   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
 999 }
1000
1001 /* Finds general ivs in basic block BB.  */
1002
1003 static void
1004 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1005 {
1006   block_stmt_iterator bsi;
1007
1008   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1009     find_givs_in_stmt (data, bsi_stmt (bsi));
1010 }
1011
1012 /* Finds general ivs.  */
1013
1014 static void
1015 find_givs (struct ivopts_data *data)
1016 {
1017   struct loop *loop = data->current_loop;
1018   basic_block *body = get_loop_body_in_dom_order (loop);
1019   unsigned i;
1020
1021   for (i = 0; i < loop->num_nodes; i++)
1022     find_givs_in_bb (data, body[i]);
1023   free (body);
1024 }
1025
1026 /* For each ssa name defined in LOOP determines whether it is an induction
1027    variable and if so, its initial value and step.  */
1028
1029 static bool
1030 find_induction_variables (struct ivopts_data *data)
1031 {
1032   unsigned i;
1033   bitmap_iterator bi;
1034
1035   if (!find_bivs (data))
1036     return false;
1037
1038   find_givs (data);
1039   mark_bivs (data);
1040
1041   if (dump_file && (dump_flags & TDF_DETAILS))
1042     {
1043       tree niter = niter_for_single_dom_exit (data);
1044
1045       if (niter)
1046         {
1047           fprintf (dump_file, "  number of iterations ");
1048           print_generic_expr (dump_file, niter, TDF_SLIM);
1049           fprintf (dump_file, "\n\n");
1050         };
1051
1052       fprintf (dump_file, "Induction variables:\n\n");
1053
1054       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1055         {
1056           if (ver_info (data, i)->iv)
1057             dump_iv (dump_file, ver_info (data, i)->iv);
1058         }
1059     }
1060
1061   return true;
1062 }
1063
1064 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1065
1066 static struct iv_use *
1067 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1068             tree stmt, enum use_type use_type)
1069 {
1070   struct iv_use *use = XCNEW (struct iv_use);
1071
1072   use->id = n_iv_uses (data);
1073   use->type = use_type;
1074   use->iv = iv;
1075   use->stmt = stmt;
1076   use->op_p = use_p;
1077   use->related_cands = BITMAP_ALLOC (NULL);
1078
1079   /* To avoid showing ssa name in the dumps, if it was not reset by the
1080      caller.  */
1081   iv->ssa_name = NULL_TREE;
1082
1083   if (dump_file && (dump_flags & TDF_DETAILS))
1084     dump_use (dump_file, use);
1085
1086   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1087
1088   return use;
1089 }
1090
1091 /* Checks whether OP is a loop-level invariant and if so, records it.
1092    NONLINEAR_USE is true if the invariant is used in a way we do not
1093    handle specially.  */
1094
1095 static void
1096 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1097 {
1098   basic_block bb;
1099   struct version_info *info;
1100
1101   if (TREE_CODE (op) != SSA_NAME
1102       || !is_gimple_reg (op))
1103     return;
1104
1105   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1106   if (bb
1107       && flow_bb_inside_loop_p (data->current_loop, bb))
1108     return;
1109
1110   info = name_info (data, op);
1111   info->name = op;
1112   info->has_nonlin_use |= nonlinear_use;
1113   if (!info->inv_id)
1114     info->inv_id = ++data->max_inv_id;
1115   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1116 }
1117
1118 /* Checks whether the use OP is interesting and if so, records it.  */
1119
1120 static struct iv_use *
1121 find_interesting_uses_op (struct ivopts_data *data, tree op)
1122 {
1123   struct iv *iv;
1124   struct iv *civ;
1125   tree stmt;
1126   struct iv_use *use;
1127
1128   if (TREE_CODE (op) != SSA_NAME)
1129     return NULL;
1130
1131   iv = get_iv (data, op);
1132   if (!iv)
1133     return NULL;
1134
1135   if (iv->have_use_for)
1136     {
1137       use = iv_use (data, iv->use_id);
1138
1139       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1140       return use;
1141     }
1142
1143   if (integer_zerop (iv->step))
1144     {
1145       record_invariant (data, op, true);
1146       return NULL;
1147     }
1148   iv->have_use_for = true;
1149
1150   civ = XNEW (struct iv);
1151   *civ = *iv;
1152
1153   stmt = SSA_NAME_DEF_STMT (op);
1154   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1155               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1156
1157   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1158   iv->use_id = use->id;
1159
1160   return use;
1161 }
1162
1163 /* Given a condition *COND_P, checks whether it is a compare of an induction
1164    variable and an invariant.  If this is the case, CONTROL_VAR is set
1165    to location of the iv, BOUND to the location of the invariant,
1166    IV_VAR and IV_BOUND are set to the corresponding induction variable
1167    descriptions, and true is returned.  If this is not the case,
1168    CONTROL_VAR and BOUND are set to the arguments of the condition and
1169    false is returned.  */
1170
1171 static bool
1172 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1173                        tree **control_var, tree **bound,
1174                        struct iv **iv_var, struct iv **iv_bound)
1175 {
1176   /* The nodes returned when COND has just one operand.  Note that you should
1177      not modify anything in BOUND or IV_BOUND because of this.  */
1178   static struct iv const_iv;
1179   static tree zero;
1180   tree cond = *cond_p;
1181   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1182   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1183   bool ret = false;
1184
1185   zero = integer_zero_node;
1186   const_iv.step = integer_zero_node;
1187
1188   if (TREE_CODE (cond) == SSA_NAME)
1189     {
1190       op0 = cond_p;
1191       iv0 = get_iv (data, cond);
1192       ret = (iv0 && !integer_zerop (iv0->step));
1193       goto end;
1194     }
1195
1196   if (!COMPARISON_CLASS_P (cond))
1197     {
1198       op0 = cond_p;
1199       goto end;
1200     }
1201
1202   op0 = &TREE_OPERAND (cond, 0);
1203   op1 = &TREE_OPERAND (cond, 1);
1204   if (TREE_CODE (*op0) == SSA_NAME)
1205     iv0 = get_iv (data, *op0);
1206   if (TREE_CODE (*op1) == SSA_NAME)
1207     iv1 = get_iv (data, *op1);
1208
1209   /* Exactly one of the compared values must be an iv, and the other one must
1210      be an invariant.  */
1211   if (!iv0 || !iv1)
1212     goto end;
1213
1214   if (integer_zerop (iv0->step))
1215     {
1216       /* Control variable may be on the other side.  */
1217       tmp_op = op0; op0 = op1; op1 = tmp_op;
1218       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1219     }
1220   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1221
1222 end:
1223   if (control_var)
1224     *control_var = op0;;
1225   if (iv_var)
1226     *iv_var = iv0;;
1227   if (bound)
1228     *bound = op1;
1229   if (iv_bound)
1230     *iv_bound = iv1;
1231
1232   return ret;
1233 }
1234
1235 /* Checks whether the condition *COND_P in STMT is interesting
1236    and if so, records it.  */
1237
1238 static void
1239 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1240 {
1241   tree *var_p, *bound_p;
1242   struct iv *var_iv, *civ;
1243
1244   if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1245     {
1246       find_interesting_uses_op (data, *var_p);
1247       find_interesting_uses_op (data, *bound_p);
1248       return;
1249     }
1250
1251   civ = XNEW (struct iv);
1252   *civ = *var_iv;
1253   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1254 }
1255
1256 /* Returns true if expression EXPR is obviously invariant in LOOP,
1257    i.e. if all its operands are defined outside of the LOOP.  */
1258
1259 bool
1260 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1261 {
1262   basic_block def_bb;
1263   unsigned i, len;
1264
1265   if (is_gimple_min_invariant (expr))
1266     return true;
1267
1268   if (TREE_CODE (expr) == SSA_NAME)
1269     {
1270       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1271       if (def_bb
1272           && flow_bb_inside_loop_p (loop, def_bb))
1273         return false;
1274
1275       return true;
1276     }
1277
1278   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1279     return false;
1280
1281   len = TREE_OPERAND_LENGTH (expr);
1282   for (i = 0; i < len; i++)
1283     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1284       return false;
1285
1286   return true;
1287 }
1288
1289 /* Cumulates the steps of indices into DATA and replaces their values with the
1290    initial ones.  Returns false when the value of the index cannot be determined.
1291    Callback for for_each_index.  */
1292
1293 struct ifs_ivopts_data
1294 {
1295   struct ivopts_data *ivopts_data;
1296   tree stmt;
1297   tree step;
1298 };
1299
1300 static bool
1301 idx_find_step (tree base, tree *idx, void *data)
1302 {
1303   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1304   struct iv *iv;
1305   tree step, iv_base, iv_step, lbound, off;
1306   struct loop *loop = dta->ivopts_data->current_loop;
1307
1308   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1309       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1310     return false;
1311
1312   /* If base is a component ref, require that the offset of the reference
1313      be invariant.  */
1314   if (TREE_CODE (base) == COMPONENT_REF)
1315     {
1316       off = component_ref_field_offset (base);
1317       return expr_invariant_in_loop_p (loop, off);
1318     }
1319
1320   /* If base is array, first check whether we will be able to move the
1321      reference out of the loop (in order to take its address in strength
1322      reduction).  In order for this to work we need both lower bound
1323      and step to be loop invariants.  */
1324   if (TREE_CODE (base) == ARRAY_REF)
1325     {
1326       step = array_ref_element_size (base);
1327       lbound = array_ref_low_bound (base);
1328
1329       if (!expr_invariant_in_loop_p (loop, step)
1330           || !expr_invariant_in_loop_p (loop, lbound))
1331         return false;
1332     }
1333
1334   if (TREE_CODE (*idx) != SSA_NAME)
1335     return true;
1336
1337   iv = get_iv (dta->ivopts_data, *idx);
1338   if (!iv)
1339     return false;
1340
1341   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1342           *&x[0], which is not folded and does not trigger the
1343           ARRAY_REF path below.  */
1344   *idx = iv->base;
1345
1346   if (integer_zerop (iv->step))
1347     return true;
1348
1349   if (TREE_CODE (base) == ARRAY_REF)
1350     {
1351       step = array_ref_element_size (base);
1352
1353       /* We only handle addresses whose step is an integer constant.  */
1354       if (TREE_CODE (step) != INTEGER_CST)
1355         return false;
1356     }
1357   else
1358     /* The step for pointer arithmetics already is 1 byte.  */
1359     step = build_int_cst (sizetype, 1);
1360
1361   iv_base = iv->base;
1362   iv_step = iv->step;
1363   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1364                             sizetype, &iv_base, &iv_step, dta->stmt,
1365                             false))
1366     {
1367       /* The index might wrap.  */
1368       return false;
1369     }
1370
1371   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1372   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1373
1374   return true;
1375 }
1376
1377 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1378    object is passed to it in DATA.  */
1379
1380 static bool
1381 idx_record_use (tree base, tree *idx,
1382                 void *vdata)
1383 {
1384   struct ivopts_data *data = (struct ivopts_data *) vdata;
1385   find_interesting_uses_op (data, *idx);
1386   if (TREE_CODE (base) == ARRAY_REF)
1387     {
1388       find_interesting_uses_op (data, array_ref_element_size (base));
1389       find_interesting_uses_op (data, array_ref_low_bound (base));
1390     }
1391   return true;
1392 }
1393
1394 /* If we can prove that TOP = cst * BOT for some constant cst,
1395    store cst to MUL and return true.  Otherwise return false.
1396    The returned value is always sign-extended, regardless of the
1397    signedness of TOP and BOT.  */
1398
1399 static bool
1400 constant_multiple_of (tree top, tree bot, double_int *mul)
1401 {
1402   tree mby;
1403   enum tree_code code;
1404   double_int res, p0, p1;
1405   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1406
1407   STRIP_NOPS (top);
1408   STRIP_NOPS (bot);
1409
1410   if (operand_equal_p (top, bot, 0))
1411     {
1412       *mul = double_int_one;
1413       return true;
1414     }
1415
1416   code = TREE_CODE (top);
1417   switch (code)
1418     {
1419     case MULT_EXPR:
1420       mby = TREE_OPERAND (top, 1);
1421       if (TREE_CODE (mby) != INTEGER_CST)
1422         return false;
1423
1424       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1425         return false;
1426
1427       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1428                               precision);
1429       return true;
1430
1431     case PLUS_EXPR:
1432     case MINUS_EXPR:
1433       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1434           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1435         return false;
1436
1437       if (code == MINUS_EXPR)
1438         p1 = double_int_neg (p1);
1439       *mul = double_int_sext (double_int_add (p0, p1), precision);
1440       return true;
1441
1442     case INTEGER_CST:
1443       if (TREE_CODE (bot) != INTEGER_CST)
1444         return false;
1445
1446       p0 = double_int_sext (tree_to_double_int (top), precision);
1447       p1 = double_int_sext (tree_to_double_int (bot), precision);
1448       if (double_int_zero_p (p1))
1449         return false;
1450       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1451                               precision);
1452       return double_int_zero_p (res);
1453
1454     default:
1455       return false;
1456     }
1457 }
1458
1459 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1460
1461 static bool
1462 may_be_unaligned_p (tree ref, tree step)
1463 {
1464   tree base;
1465   tree base_type;
1466   HOST_WIDE_INT bitsize;
1467   HOST_WIDE_INT bitpos;
1468   tree toffset;
1469   enum machine_mode mode;
1470   int unsignedp, volatilep;
1471   unsigned base_align;
1472
1473   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1474      thus they are not misaligned.  */
1475   if (TREE_CODE (ref) == TARGET_MEM_REF)
1476     return false;
1477
1478   /* The test below is basically copy of what expr.c:normal_inner_ref
1479      does to check whether the object must be loaded by parts when
1480      STRICT_ALIGNMENT is true.  */
1481   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1482                               &unsignedp, &volatilep, true);
1483   base_type = TREE_TYPE (base);
1484   base_align = TYPE_ALIGN (base_type);
1485
1486   if (mode != BLKmode)
1487     {
1488       double_int mul;
1489       tree al = build_int_cst (TREE_TYPE (step),
1490                                GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1491
1492       if (base_align < GET_MODE_ALIGNMENT (mode)
1493           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1494           || bitpos % BITS_PER_UNIT != 0)
1495         return true;
1496
1497       if (!constant_multiple_of (step, al, &mul))
1498         return true;
1499     }
1500
1501   return false;
1502 }
1503
1504 /* Return true if EXPR may be non-addressable.   */
1505
1506 static bool
1507 may_be_nonaddressable_p (tree expr)
1508 {
1509   switch (TREE_CODE (expr))
1510     {
1511     case TARGET_MEM_REF:
1512       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1513          target, thus they are always addressable.  */
1514       return false;
1515
1516     case COMPONENT_REF:
1517       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1518              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1519
1520     case VIEW_CONVERT_EXPR:
1521       /* This kind of view-conversions may wrap non-addressable objects
1522          and make them look addressable.  After some processing the
1523          non-addressability may be uncovered again, causing ADDR_EXPRs
1524          of inappropriate objects to be built.  */
1525       if (AGGREGATE_TYPE_P (TREE_TYPE (expr))
1526           && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0))))
1527         return true;
1528
1529       /* ... fall through ... */
1530
1531     case ARRAY_REF:
1532     case ARRAY_RANGE_REF:
1533       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1534
1535     case CONVERT_EXPR:
1536     case NON_LVALUE_EXPR:
1537     case NOP_EXPR:
1538       return true;
1539
1540     default:
1541       break;
1542     }
1543
1544   return false;
1545 }
1546
1547 /* Finds addresses in *OP_P inside STMT.  */
1548
1549 static void
1550 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1551 {
1552   tree base = *op_p, step = build_int_cst (sizetype, 0);
1553   struct iv *civ;
1554   struct ifs_ivopts_data ifs_ivopts_data;
1555
1556   /* Do not play with volatile memory references.  A bit too conservative,
1557      perhaps, but safe.  */
1558   if (stmt_ann (stmt)->has_volatile_ops)
1559     goto fail;
1560
1561   /* Ignore bitfields for now.  Not really something terribly complicated
1562      to handle.  TODO.  */
1563   if (TREE_CODE (base) == BIT_FIELD_REF)
1564     goto fail;
1565
1566   base = unshare_expr (base);
1567
1568   if (TREE_CODE (base) == TARGET_MEM_REF)
1569     {
1570       tree type = build_pointer_type (TREE_TYPE (base));
1571       tree astep;
1572
1573       if (TMR_BASE (base)
1574           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1575         {
1576           civ = get_iv (data, TMR_BASE (base));
1577           if (!civ)
1578             goto fail;
1579
1580           TMR_BASE (base) = civ->base;
1581           step = civ->step;
1582         }
1583       if (TMR_INDEX (base)
1584           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1585         {
1586           civ = get_iv (data, TMR_INDEX (base));
1587           if (!civ)
1588             goto fail;
1589
1590           TMR_INDEX (base) = civ->base;
1591           astep = civ->step;
1592
1593           if (astep)
1594             {
1595               if (TMR_STEP (base))
1596                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1597
1598               step = fold_build2 (PLUS_EXPR, type, step, astep);
1599             }
1600         }
1601
1602       if (integer_zerop (step))
1603         goto fail;
1604       base = tree_mem_ref_addr (type, base);
1605     }
1606   else
1607     {
1608       ifs_ivopts_data.ivopts_data = data;
1609       ifs_ivopts_data.stmt = stmt;
1610       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1611       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1612           || integer_zerop (ifs_ivopts_data.step))
1613         goto fail;
1614       step = ifs_ivopts_data.step;
1615
1616       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1617       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1618
1619       /* Check that the base expression is addressable.  This needs
1620          to be done after substituting bases of IVs into it.  */
1621       if (may_be_nonaddressable_p (base))
1622         goto fail;
1623
1624       /* Moreover, on strict alignment platforms, check that it is
1625          sufficiently aligned.  */
1626       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1627         goto fail;
1628
1629       base = build_fold_addr_expr (base);
1630
1631       /* Substituting bases of IVs into the base expression might
1632          have caused folding opportunities.  */
1633       if (TREE_CODE (base) == ADDR_EXPR)
1634         {
1635           tree *ref = &TREE_OPERAND (base, 0);
1636           while (handled_component_p (*ref))
1637             ref = &TREE_OPERAND (*ref, 0);
1638           if (TREE_CODE (*ref) == INDIRECT_REF)
1639             *ref = fold_indirect_ref (*ref);
1640         }
1641     }
1642
1643   civ = alloc_iv (base, step);
1644   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1645   return;
1646
1647 fail:
1648   for_each_index (op_p, idx_record_use, data);
1649 }
1650
1651 /* Finds and records invariants used in STMT.  */
1652
1653 static void
1654 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1655 {
1656   ssa_op_iter iter;
1657   use_operand_p use_p;
1658   tree op;
1659
1660   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1661     {
1662       op = USE_FROM_PTR (use_p);
1663       record_invariant (data, op, false);
1664     }
1665 }
1666
1667 /* Finds interesting uses of induction variables in the statement STMT.  */
1668
1669 static void
1670 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1671 {
1672   struct iv *iv;
1673   tree op, lhs, rhs;
1674   ssa_op_iter iter;
1675   use_operand_p use_p;
1676
1677   find_invariants_stmt (data, stmt);
1678
1679   if (TREE_CODE (stmt) == COND_EXPR)
1680     {
1681       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1682       return;
1683     }
1684
1685   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1686     {
1687       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1688       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1689
1690       if (TREE_CODE (lhs) == SSA_NAME)
1691         {
1692           /* If the statement defines an induction variable, the uses are not
1693              interesting by themselves.  */
1694
1695           iv = get_iv (data, lhs);
1696
1697           if (iv && !integer_zerop (iv->step))
1698             return;
1699         }
1700
1701       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1702         {
1703         case tcc_comparison:
1704           find_interesting_uses_cond (data, stmt,
1705                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1706           return;
1707
1708         case tcc_reference:
1709           find_interesting_uses_address (data, stmt,
1710                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1711           if (REFERENCE_CLASS_P (lhs))
1712             find_interesting_uses_address (data, stmt,
1713                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1714           return;
1715
1716         default: ;
1717         }
1718
1719       if (REFERENCE_CLASS_P (lhs)
1720           && is_gimple_val (rhs))
1721         {
1722           find_interesting_uses_address (data, stmt,
1723                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1724           find_interesting_uses_op (data, rhs);
1725           return;
1726         }
1727
1728       /* TODO -- we should also handle address uses of type
1729
1730          memory = call (whatever);
1731
1732          and
1733
1734          call (memory).  */
1735     }
1736
1737   if (TREE_CODE (stmt) == PHI_NODE
1738       && bb_for_stmt (stmt) == data->current_loop->header)
1739     {
1740       lhs = PHI_RESULT (stmt);
1741       iv = get_iv (data, lhs);
1742
1743       if (iv && !integer_zerop (iv->step))
1744         return;
1745     }
1746
1747   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1748     {
1749       op = USE_FROM_PTR (use_p);
1750
1751       if (TREE_CODE (op) != SSA_NAME)
1752         continue;
1753
1754       iv = get_iv (data, op);
1755       if (!iv)
1756         continue;
1757
1758       find_interesting_uses_op (data, op);
1759     }
1760 }
1761
1762 /* Finds interesting uses of induction variables outside of loops
1763    on loop exit edge EXIT.  */
1764
1765 static void
1766 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1767 {
1768   tree phi, def;
1769
1770   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1771     {
1772       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1773       if (is_gimple_reg (def))
1774         find_interesting_uses_op (data, def);
1775     }
1776 }
1777
1778 /* Finds uses of the induction variables that are interesting.  */
1779
1780 static void
1781 find_interesting_uses (struct ivopts_data *data)
1782 {
1783   basic_block bb;
1784   block_stmt_iterator bsi;
1785   tree phi;
1786   basic_block *body = get_loop_body (data->current_loop);
1787   unsigned i;
1788   struct version_info *info;
1789   edge e;
1790
1791   if (dump_file && (dump_flags & TDF_DETAILS))
1792     fprintf (dump_file, "Uses:\n\n");
1793
1794   for (i = 0; i < data->current_loop->num_nodes; i++)
1795     {
1796       edge_iterator ei;
1797       bb = body[i];
1798
1799       FOR_EACH_EDGE (e, ei, bb->succs)
1800         if (e->dest != EXIT_BLOCK_PTR
1801             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1802           find_interesting_uses_outside (data, e);
1803
1804       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1805         find_interesting_uses_stmt (data, phi);
1806       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1807         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1808     }
1809
1810   if (dump_file && (dump_flags & TDF_DETAILS))
1811     {
1812       bitmap_iterator bi;
1813
1814       fprintf (dump_file, "\n");
1815
1816       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1817         {
1818           info = ver_info (data, i);
1819           if (info->inv_id)
1820             {
1821               fprintf (dump_file, "  ");
1822               print_generic_expr (dump_file, info->name, TDF_SLIM);
1823               fprintf (dump_file, " is invariant (%d)%s\n",
1824                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1825             }
1826         }
1827
1828       fprintf (dump_file, "\n");
1829     }
1830
1831   free (body);
1832 }
1833
1834 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1835    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1836    we are at the top-level of the processed address.  */
1837
1838 static tree
1839 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1840                 unsigned HOST_WIDE_INT *offset)
1841 {
1842   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1843   enum tree_code code;
1844   tree type, orig_type = TREE_TYPE (expr);
1845   unsigned HOST_WIDE_INT off0, off1, st;
1846   tree orig_expr = expr;
1847
1848   STRIP_NOPS (expr);
1849
1850   type = TREE_TYPE (expr);
1851   code = TREE_CODE (expr);
1852   *offset = 0;
1853
1854   switch (code)
1855     {
1856     case INTEGER_CST:
1857       if (!cst_and_fits_in_hwi (expr)
1858           || integer_zerop (expr))
1859         return orig_expr;
1860
1861       *offset = int_cst_value (expr);
1862       return build_int_cst (orig_type, 0);
1863
1864     case POINTER_PLUS_EXPR:
1865     case PLUS_EXPR:
1866     case MINUS_EXPR:
1867       op0 = TREE_OPERAND (expr, 0);
1868       op1 = TREE_OPERAND (expr, 1);
1869
1870       op0 = strip_offset_1 (op0, false, false, &off0);
1871       op1 = strip_offset_1 (op1, false, false, &off1);
1872
1873       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1874       if (op0 == TREE_OPERAND (expr, 0)
1875           && op1 == TREE_OPERAND (expr, 1))
1876         return orig_expr;
1877
1878       if (integer_zerop (op1))
1879         expr = op0;
1880       else if (integer_zerop (op0))
1881         {
1882           if (code == MINUS_EXPR)
1883             expr = fold_build1 (NEGATE_EXPR, type, op1);
1884           else
1885             expr = op1;
1886         }
1887       else
1888         expr = fold_build2 (code, type, op0, op1);
1889
1890       return fold_convert (orig_type, expr);
1891
1892     case ARRAY_REF:
1893       if (!inside_addr)
1894         return orig_expr;
1895
1896       step = array_ref_element_size (expr);
1897       if (!cst_and_fits_in_hwi (step))
1898         break;
1899
1900       st = int_cst_value (step);
1901       op1 = TREE_OPERAND (expr, 1);
1902       op1 = strip_offset_1 (op1, false, false, &off1);
1903       *offset = off1 * st;
1904
1905       if (top_compref
1906           && integer_zerop (op1))
1907         {
1908           /* Strip the component reference completely.  */
1909           op0 = TREE_OPERAND (expr, 0);
1910           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1911           *offset += off0;
1912           return op0;
1913         }
1914       break;
1915
1916     case COMPONENT_REF:
1917       if (!inside_addr)
1918         return orig_expr;
1919
1920       tmp = component_ref_field_offset (expr);
1921       if (top_compref
1922           && cst_and_fits_in_hwi (tmp))
1923         {
1924           /* Strip the component reference completely.  */
1925           op0 = TREE_OPERAND (expr, 0);
1926           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1927           *offset = off0 + int_cst_value (tmp);
1928           return op0;
1929         }
1930       break;
1931
1932     case ADDR_EXPR:
1933       op0 = TREE_OPERAND (expr, 0);
1934       op0 = strip_offset_1 (op0, true, true, &off0);
1935       *offset += off0;
1936
1937       if (op0 == TREE_OPERAND (expr, 0))
1938         return orig_expr;
1939
1940       expr = build_fold_addr_expr (op0);
1941       return fold_convert (orig_type, expr);
1942
1943     case INDIRECT_REF:
1944       inside_addr = false;
1945       break;
1946
1947     default:
1948       return orig_expr;
1949     }
1950
1951   /* Default handling of expressions for that we want to recurse into
1952      the first operand.  */
1953   op0 = TREE_OPERAND (expr, 0);
1954   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1955   *offset += off0;
1956
1957   if (op0 == TREE_OPERAND (expr, 0)
1958       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1959     return orig_expr;
1960
1961   expr = copy_node (expr);
1962   TREE_OPERAND (expr, 0) = op0;
1963   if (op1)
1964     TREE_OPERAND (expr, 1) = op1;
1965
1966   /* Inside address, we might strip the top level component references,
1967      thus changing type of the expression.  Handling of ADDR_EXPR
1968      will fix that.  */
1969   expr = fold_convert (orig_type, expr);
1970
1971   return expr;
1972 }
1973
1974 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1975
1976 static tree
1977 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1978 {
1979   return strip_offset_1 (expr, false, false, offset);
1980 }
1981
1982 /* Returns variant of TYPE that can be used as base for different uses.
1983    We return unsigned type with the same precision, which avoids problems
1984    with overflows.  */
1985
1986 static tree
1987 generic_type_for (tree type)
1988 {
1989   if (POINTER_TYPE_P (type))
1990     return unsigned_type_for (type);
1991
1992   if (TYPE_UNSIGNED (type))
1993     return type;
1994
1995   return unsigned_type_for (type);
1996 }
1997
1998 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1999    the bitmap to that we should store it.  */
2000
2001 static struct ivopts_data *fd_ivopts_data;
2002 static tree
2003 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2004 {
2005   bitmap *depends_on = (bitmap *) data;
2006   struct version_info *info;
2007
2008   if (TREE_CODE (*expr_p) != SSA_NAME)
2009     return NULL_TREE;
2010   info = name_info (fd_ivopts_data, *expr_p);
2011
2012   if (!info->inv_id || info->has_nonlin_use)
2013     return NULL_TREE;
2014
2015   if (!*depends_on)
2016     *depends_on = BITMAP_ALLOC (NULL);
2017   bitmap_set_bit (*depends_on, info->inv_id);
2018
2019   return NULL_TREE;
2020 }
2021
2022 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2023    position to POS.  If USE is not NULL, the candidate is set as related to
2024    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2025    replacement of the final value of the iv by a direct computation.  */
2026
2027 static struct iv_cand *
2028 add_candidate_1 (struct ivopts_data *data,
2029                  tree base, tree step, bool important, enum iv_position pos,
2030                  struct iv_use *use, tree incremented_at)
2031 {
2032   unsigned i;
2033   struct iv_cand *cand = NULL;
2034   tree type, orig_type;
2035
2036   if (base)
2037     {
2038       orig_type = TREE_TYPE (base);
2039       type = generic_type_for (orig_type);
2040       if (type != orig_type)
2041         {
2042           base = fold_convert (type, base);
2043           step = fold_convert (type, step);
2044         }
2045     }
2046
2047   for (i = 0; i < n_iv_cands (data); i++)
2048     {
2049       cand = iv_cand (data, i);
2050
2051       if (cand->pos != pos)
2052         continue;
2053
2054       if (cand->incremented_at != incremented_at)
2055         continue;
2056
2057       if (!cand->iv)
2058         {
2059           if (!base && !step)
2060             break;
2061
2062           continue;
2063         }
2064
2065       if (!base && !step)
2066         continue;
2067
2068       if (operand_equal_p (base, cand->iv->base, 0)
2069           && operand_equal_p (step, cand->iv->step, 0))
2070         break;
2071     }
2072
2073   if (i == n_iv_cands (data))
2074     {
2075       cand = XCNEW (struct iv_cand);
2076       cand->id = i;
2077
2078       if (!base && !step)
2079         cand->iv = NULL;
2080       else
2081         cand->iv = alloc_iv (base, step);
2082
2083       cand->pos = pos;
2084       if (pos != IP_ORIGINAL && cand->iv)
2085         {
2086           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2087           cand->var_after = cand->var_before;
2088         }
2089       cand->important = important;
2090       cand->incremented_at = incremented_at;
2091       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2092
2093       if (step
2094           && TREE_CODE (step) != INTEGER_CST)
2095         {
2096           fd_ivopts_data = data;
2097           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2098         }
2099
2100       if (dump_file && (dump_flags & TDF_DETAILS))
2101         dump_cand (dump_file, cand);
2102     }
2103
2104   if (important && !cand->important)
2105     {
2106       cand->important = true;
2107       if (dump_file && (dump_flags & TDF_DETAILS))
2108         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2109     }
2110
2111   if (use)
2112     {
2113       bitmap_set_bit (use->related_cands, i);
2114       if (dump_file && (dump_flags & TDF_DETAILS))
2115         fprintf (dump_file, "Candidate %d is related to use %d\n",
2116                  cand->id, use->id);
2117     }
2118
2119   return cand;
2120 }
2121
2122 /* Returns true if incrementing the induction variable at the end of the LOOP
2123    is allowed.
2124
2125    The purpose is to avoid splitting latch edge with a biv increment, thus
2126    creating a jump, possibly confusing other optimization passes and leaving
2127    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2128    is not available (so we do not have a better alternative), or if the latch
2129    edge is already nonempty.  */
2130
2131 static bool
2132 allow_ip_end_pos_p (struct loop *loop)
2133 {
2134   if (!ip_normal_pos (loop))
2135     return true;
2136
2137   if (!empty_block_p (ip_end_pos (loop)))
2138     return true;
2139
2140   return false;
2141 }
2142
2143 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2144    position to POS.  If USE is not NULL, the candidate is set as related to
2145    it.  The candidate computation is scheduled on all available positions.  */
2146
2147 static void
2148 add_candidate (struct ivopts_data *data,
2149                tree base, tree step, bool important, struct iv_use *use)
2150 {
2151   if (ip_normal_pos (data->current_loop))
2152     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2153   if (ip_end_pos (data->current_loop)
2154       && allow_ip_end_pos_p (data->current_loop))
2155     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2156 }
2157
2158 /* Add a standard "0 + 1 * iteration" iv candidate for a
2159    type with SIZE bits.  */
2160
2161 static void
2162 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2163                                      unsigned int size)
2164 {
2165   tree type = lang_hooks.types.type_for_size (size, true);
2166   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2167                  true, NULL);
2168 }
2169
2170 /* Adds standard iv candidates.  */
2171
2172 static void
2173 add_standard_iv_candidates (struct ivopts_data *data)
2174 {
2175   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2176
2177   /* The same for a double-integer type if it is still fast enough.  */
2178   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2179     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2180 }
2181
2182
2183 /* Adds candidates bases on the old induction variable IV.  */
2184
2185 static void
2186 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2187 {
2188   tree phi, def;
2189   struct iv_cand *cand;
2190
2191   add_candidate (data, iv->base, iv->step, true, NULL);
2192
2193   /* The same, but with initial value zero.  */
2194   add_candidate (data,
2195                  build_int_cst (TREE_TYPE (iv->base), 0),
2196                  iv->step, true, NULL);
2197
2198   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2199   if (TREE_CODE (phi) == PHI_NODE)
2200     {
2201       /* Additionally record the possibility of leaving the original iv
2202          untouched.  */
2203       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2204       cand = add_candidate_1 (data,
2205                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2206                               SSA_NAME_DEF_STMT (def));
2207       cand->var_before = iv->ssa_name;
2208       cand->var_after = def;
2209     }
2210 }
2211
2212 /* Adds candidates based on the old induction variables.  */
2213
2214 static void
2215 add_old_ivs_candidates (struct ivopts_data *data)
2216 {
2217   unsigned i;
2218   struct iv *iv;
2219   bitmap_iterator bi;
2220
2221   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2222     {
2223       iv = ver_info (data, i)->iv;
2224       if (iv && iv->biv_p && !integer_zerop (iv->step))
2225         add_old_iv_candidates (data, iv);
2226     }
2227 }
2228
2229 /* Adds candidates based on the value of the induction variable IV and USE.  */
2230
2231 static void
2232 add_iv_value_candidates (struct ivopts_data *data,
2233                          struct iv *iv, struct iv_use *use)
2234 {
2235   unsigned HOST_WIDE_INT offset;
2236   tree base;
2237
2238   add_candidate (data, iv->base, iv->step, false, use);
2239
2240   /* The same, but with initial value zero.  Make such variable important,
2241      since it is generic enough so that possibly many uses may be based
2242      on it.  */
2243   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2244                  iv->step, true, use);
2245
2246   /* Third, try removing the constant offset.  */
2247   base = strip_offset (iv->base, &offset);
2248   if (offset)
2249     add_candidate (data, base, iv->step, false, use);
2250 }
2251
2252 /* Adds candidates based on the uses.  */
2253
2254 static void
2255 add_derived_ivs_candidates (struct ivopts_data *data)
2256 {
2257   unsigned i;
2258
2259   for (i = 0; i < n_iv_uses (data); i++)
2260     {
2261       struct iv_use *use = iv_use (data, i);
2262
2263       if (!use)
2264         continue;
2265
2266       switch (use->type)
2267         {
2268         case USE_NONLINEAR_EXPR:
2269         case USE_COMPARE:
2270         case USE_ADDRESS:
2271           /* Just add the ivs based on the value of the iv used here.  */
2272           add_iv_value_candidates (data, use->iv, use);
2273           break;
2274
2275         default:
2276           gcc_unreachable ();
2277         }
2278     }
2279 }
2280
2281 /* Record important candidates and add them to related_cands bitmaps
2282    if needed.  */
2283
2284 static void
2285 record_important_candidates (struct ivopts_data *data)
2286 {
2287   unsigned i;
2288   struct iv_use *use;
2289
2290   for (i = 0; i < n_iv_cands (data); i++)
2291     {
2292       struct iv_cand *cand = iv_cand (data, i);
2293
2294       if (cand->important)
2295         bitmap_set_bit (data->important_candidates, i);
2296     }
2297
2298   data->consider_all_candidates = (n_iv_cands (data)
2299                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2300
2301   if (data->consider_all_candidates)
2302     {
2303       /* We will not need "related_cands" bitmaps in this case,
2304          so release them to decrease peak memory consumption.  */
2305       for (i = 0; i < n_iv_uses (data); i++)
2306         {
2307           use = iv_use (data, i);
2308           BITMAP_FREE (use->related_cands);
2309         }
2310     }
2311   else
2312     {
2313       /* Add important candidates to the related_cands bitmaps.  */
2314       for (i = 0; i < n_iv_uses (data); i++)
2315         bitmap_ior_into (iv_use (data, i)->related_cands,
2316                          data->important_candidates);
2317     }
2318 }
2319
2320 /* Finds the candidates for the induction variables.  */
2321
2322 static void
2323 find_iv_candidates (struct ivopts_data *data)
2324 {
2325   /* Add commonly used ivs.  */
2326   add_standard_iv_candidates (data);
2327
2328   /* Add old induction variables.  */
2329   add_old_ivs_candidates (data);
2330
2331   /* Add induction variables derived from uses.  */
2332   add_derived_ivs_candidates (data);
2333
2334   /* Record the important candidates.  */
2335   record_important_candidates (data);
2336 }
2337
2338 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2339    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2340    we allocate a simple list to every use.  */
2341
2342 static void
2343 alloc_use_cost_map (struct ivopts_data *data)
2344 {
2345   unsigned i, size, s, j;
2346
2347   for (i = 0; i < n_iv_uses (data); i++)
2348     {
2349       struct iv_use *use = iv_use (data, i);
2350       bitmap_iterator bi;
2351
2352       if (data->consider_all_candidates)
2353         size = n_iv_cands (data);
2354       else
2355         {
2356           s = 0;
2357           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2358             {
2359               s++;
2360             }
2361
2362           /* Round up to the power of two, so that moduling by it is fast.  */
2363           for (size = 1; size < s; size <<= 1)
2364             continue;
2365         }
2366
2367       use->n_map_members = size;
2368       use->cost_map = XCNEWVEC (struct cost_pair, size);
2369     }
2370 }
2371
2372 /* Returns description of computation cost of expression whose runtime
2373    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2374
2375 static comp_cost
2376 new_cost (unsigned runtime, unsigned complexity)
2377 {
2378   comp_cost cost;
2379
2380   cost.cost = runtime;
2381   cost.complexity = complexity;
2382
2383   return cost;
2384 }
2385
2386 /* Adds costs COST1 and COST2.  */
2387
2388 static comp_cost
2389 add_costs (comp_cost cost1, comp_cost cost2)
2390 {
2391   cost1.cost += cost2.cost;
2392   cost1.complexity += cost2.complexity;
2393
2394   return cost1;
2395 }
2396 /* Subtracts costs COST1 and COST2.  */
2397
2398 static comp_cost
2399 sub_costs (comp_cost cost1, comp_cost cost2)
2400 {
2401   cost1.cost -= cost2.cost;
2402   cost1.complexity -= cost2.complexity;
2403
2404   return cost1;
2405 }
2406
2407 /* Returns a negative number if COST1 < COST2, a positive number if
2408    COST1 > COST2, and 0 if COST1 = COST2.  */
2409
2410 static int
2411 compare_costs (comp_cost cost1, comp_cost cost2)
2412 {
2413   if (cost1.cost == cost2.cost)
2414     return cost1.complexity - cost2.complexity;
2415
2416   return cost1.cost - cost2.cost;
2417 }
2418
2419 /* Returns true if COST is infinite.  */
2420
2421 static bool
2422 infinite_cost_p (comp_cost cost)
2423 {
2424   return cost.cost == INFTY;
2425 }
2426
2427 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2428    on invariants DEPENDS_ON and that the value used in expressing it
2429    is VALUE.*/
2430
2431 static void
2432 set_use_iv_cost (struct ivopts_data *data,
2433                  struct iv_use *use, struct iv_cand *cand,
2434                  comp_cost cost, bitmap depends_on, tree value)
2435 {
2436   unsigned i, s;
2437
2438   if (infinite_cost_p (cost))
2439     {
2440       BITMAP_FREE (depends_on);
2441       return;
2442     }
2443
2444   if (data->consider_all_candidates)
2445     {
2446       use->cost_map[cand->id].cand = cand;
2447       use->cost_map[cand->id].cost = cost;
2448       use->cost_map[cand->id].depends_on = depends_on;
2449       use->cost_map[cand->id].value = value;
2450       return;
2451     }
2452
2453   /* n_map_members is a power of two, so this computes modulo.  */
2454   s = cand->id & (use->n_map_members - 1);
2455   for (i = s; i < use->n_map_members; i++)
2456     if (!use->cost_map[i].cand)
2457       goto found;
2458   for (i = 0; i < s; i++)
2459     if (!use->cost_map[i].cand)
2460       goto found;
2461
2462   gcc_unreachable ();
2463
2464 found:
2465   use->cost_map[i].cand = cand;
2466   use->cost_map[i].cost = cost;
2467   use->cost_map[i].depends_on = depends_on;
2468   use->cost_map[i].value = value;
2469 }
2470
2471 /* Gets cost of (USE, CANDIDATE) pair.  */
2472
2473 static struct cost_pair *
2474 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2475                  struct iv_cand *cand)
2476 {
2477   unsigned i, s;
2478   struct cost_pair *ret;
2479
2480   if (!cand)
2481     return NULL;
2482
2483   if (data->consider_all_candidates)
2484     {
2485       ret = use->cost_map + cand->id;
2486       if (!ret->cand)
2487         return NULL;
2488
2489       return ret;
2490     }
2491
2492   /* n_map_members is a power of two, so this computes modulo.  */
2493   s = cand->id & (use->n_map_members - 1);
2494   for (i = s; i < use->n_map_members; i++)
2495     if (use->cost_map[i].cand == cand)
2496       return use->cost_map + i;
2497
2498   for (i = 0; i < s; i++)
2499     if (use->cost_map[i].cand == cand)
2500       return use->cost_map + i;
2501
2502   return NULL;
2503 }
2504
2505 /* Returns estimate on cost of computing SEQ.  */
2506
2507 static unsigned
2508 seq_cost (rtx seq)
2509 {
2510   unsigned cost = 0;
2511   rtx set;
2512
2513   for (; seq; seq = NEXT_INSN (seq))
2514     {
2515       set = single_set (seq);
2516       if (set)
2517         cost += rtx_cost (set, SET);
2518       else
2519         cost++;
2520     }
2521
2522   return cost;
2523 }
2524
2525 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2526 static rtx
2527 produce_memory_decl_rtl (tree obj, int *regno)
2528 {
2529   rtx x;
2530
2531   gcc_assert (obj);
2532   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2533     {
2534       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2535       x = gen_rtx_SYMBOL_REF (Pmode, name);
2536       SET_SYMBOL_REF_DECL (x, obj);
2537       x = gen_rtx_MEM (DECL_MODE (obj), x);
2538       targetm.encode_section_info (obj, x, true);
2539     }
2540   else
2541     {
2542       x = gen_raw_REG (Pmode, (*regno)++);
2543       x = gen_rtx_MEM (DECL_MODE (obj), x);
2544     }
2545
2546   return x;
2547 }
2548
2549 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2550    walk_tree.  DATA contains the actual fake register number.  */
2551
2552 static tree
2553 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2554 {
2555   tree obj = NULL_TREE;
2556   rtx x = NULL_RTX;
2557   int *regno = (int *) data;
2558
2559   switch (TREE_CODE (*expr_p))
2560     {
2561     case ADDR_EXPR:
2562       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2563            handled_component_p (*expr_p);
2564            expr_p = &TREE_OPERAND (*expr_p, 0))
2565         continue;
2566       obj = *expr_p;
2567       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2568         x = produce_memory_decl_rtl (obj, regno);
2569       break;
2570
2571     case SSA_NAME:
2572       *ws = 0;
2573       obj = SSA_NAME_VAR (*expr_p);
2574       if (!DECL_RTL_SET_P (obj))
2575         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2576       break;
2577
2578     case VAR_DECL:
2579     case PARM_DECL:
2580     case RESULT_DECL:
2581       *ws = 0;
2582       obj = *expr_p;
2583
2584       if (DECL_RTL_SET_P (obj))
2585         break;
2586
2587       if (DECL_MODE (obj) == BLKmode)
2588         x = produce_memory_decl_rtl (obj, regno);
2589       else
2590         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2591
2592       break;
2593
2594     default:
2595       break;
2596     }
2597
2598   if (x)
2599     {
2600       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2601       SET_DECL_RTL (obj, x);
2602     }
2603
2604   return NULL_TREE;
2605 }
2606
2607 /* Determines cost of the computation of EXPR.  */
2608
2609 static unsigned
2610 computation_cost (tree expr)
2611 {
2612   rtx seq, rslt;
2613   tree type = TREE_TYPE (expr);
2614   unsigned cost;
2615   /* Avoid using hard regs in ways which may be unsupported.  */
2616   int regno = LAST_VIRTUAL_REGISTER + 1;
2617
2618   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2619   start_sequence ();
2620   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2621   seq = get_insns ();
2622   end_sequence ();
2623
2624   cost = seq_cost (seq);
2625   if (MEM_P (rslt))
2626     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2627
2628   return cost;
2629 }
2630
2631 /* Returns variable containing the value of candidate CAND at statement AT.  */
2632
2633 static tree
2634 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2635 {
2636   if (stmt_after_increment (loop, cand, stmt))
2637     return cand->var_after;
2638   else
2639     return cand->var_before;
2640 }
2641
2642 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2643    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2644
2645 int
2646 tree_int_cst_sign_bit (const_tree t)
2647 {
2648   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2649   unsigned HOST_WIDE_INT w;
2650
2651   if (bitno < HOST_BITS_PER_WIDE_INT)
2652     w = TREE_INT_CST_LOW (t);
2653   else
2654     {
2655       w = TREE_INT_CST_HIGH (t);
2656       bitno -= HOST_BITS_PER_WIDE_INT;
2657     }
2658
2659   return (w >> bitno) & 1;
2660 }
2661
2662 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2663    same precision that is at least as wide as the precision of TYPE, stores
2664    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2665    type of A and B.  */
2666
2667 static tree
2668 determine_common_wider_type (tree *a, tree *b)
2669 {
2670   tree wider_type = NULL;
2671   tree suba, subb;
2672   tree atype = TREE_TYPE (*a);
2673
2674   if ((TREE_CODE (*a) == NOP_EXPR
2675        || TREE_CODE (*a) == CONVERT_EXPR))
2676     {
2677       suba = TREE_OPERAND (*a, 0);
2678       wider_type = TREE_TYPE (suba);
2679       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2680         return atype;
2681     }
2682   else
2683     return atype;
2684
2685   if ((TREE_CODE (*b) == NOP_EXPR
2686        || TREE_CODE (*b) == CONVERT_EXPR))
2687     {
2688       subb = TREE_OPERAND (*b, 0);
2689       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2690         return atype;
2691     }
2692   else
2693     return atype;
2694
2695   *a = suba;
2696   *b = subb;
2697   return wider_type;
2698 }
2699
2700 /* Determines the expression by that USE is expressed from induction variable
2701    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2702    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2703
2704 static bool
2705 get_computation_aff (struct loop *loop,
2706                      struct iv_use *use, struct iv_cand *cand, tree at,
2707                      struct affine_tree_combination *aff)
2708 {
2709   tree ubase = use->iv->base;
2710   tree ustep = use->iv->step;
2711   tree cbase = cand->iv->base;
2712   tree cstep = cand->iv->step, cstep_common;
2713   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2714   tree common_type, var;
2715   tree uutype;
2716   aff_tree cbase_aff, var_aff;
2717   double_int rat;
2718
2719   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2720     {
2721       /* We do not have a precision to express the values of use.  */
2722       return false;
2723     }
2724
2725   var = var_at_stmt (loop, cand, at);
2726   uutype = unsigned_type_for (utype);
2727
2728   /* If the conversion is not noop, perform it.  */
2729   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2730     {
2731       cstep = fold_convert (uutype, cstep);
2732       cbase = fold_convert (uutype, cbase);
2733       var = fold_convert (uutype, var);
2734     }
2735
2736   if (!constant_multiple_of (ustep, cstep, &rat))
2737     return false;
2738
2739   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2740      type, we achieve better folding by computing their difference in this
2741      wider type, and cast the result to UUTYPE.  We do not need to worry about
2742      overflows, as all the arithmetics will in the end be performed in UUTYPE
2743      anyway.  */
2744   common_type = determine_common_wider_type (&ubase, &cbase);
2745
2746   /* use = ubase - ratio * cbase + ratio * var.  */
2747   tree_to_aff_combination (ubase, common_type, aff);
2748   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2749   tree_to_aff_combination (var, uutype, &var_aff);
2750
2751   /* We need to shift the value if we are after the increment.  */
2752   if (stmt_after_increment (loop, cand, at))
2753     {
2754       aff_tree cstep_aff;
2755
2756       if (common_type != uutype)
2757         cstep_common = fold_convert (common_type, cstep);
2758       else
2759         cstep_common = cstep;
2760
2761       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2762       aff_combination_add (&cbase_aff, &cstep_aff);
2763     }
2764
2765   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2766   aff_combination_add (aff, &cbase_aff);
2767   if (common_type != uutype)
2768     aff_combination_convert (aff, uutype);
2769
2770   aff_combination_scale (&var_aff, rat);
2771   aff_combination_add (aff, &var_aff);
2772
2773   return true;
2774 }
2775
2776 /* Determines the expression by that USE is expressed from induction variable
2777    CAND at statement AT in LOOP.  The computation is unshared.  */
2778
2779 static tree
2780 get_computation_at (struct loop *loop,
2781                     struct iv_use *use, struct iv_cand *cand, tree at)
2782 {
2783   aff_tree aff;
2784   tree type = TREE_TYPE (use->iv->base);
2785
2786   if (!get_computation_aff (loop, use, cand, at, &aff))
2787     return NULL_TREE;
2788   unshare_aff_combination (&aff);
2789   return fold_convert (type, aff_combination_to_tree (&aff));
2790 }
2791
2792 /* Determines the expression by that USE is expressed from induction variable
2793    CAND in LOOP.  The computation is unshared.  */
2794
2795 static tree
2796 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2797 {
2798   return get_computation_at (loop, use, cand, use->stmt);
2799 }
2800
2801 /* Returns cost of addition in MODE.  */
2802
2803 static unsigned
2804 add_cost (enum machine_mode mode)
2805 {
2806   static unsigned costs[NUM_MACHINE_MODES];
2807   rtx seq;
2808   unsigned cost;
2809
2810   if (costs[mode])
2811     return costs[mode];
2812
2813   start_sequence ();
2814   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2815                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2816                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2817                  NULL_RTX);
2818   seq = get_insns ();
2819   end_sequence ();
2820
2821   cost = seq_cost (seq);
2822   if (!cost)
2823     cost = 1;
2824
2825   costs[mode] = cost;
2826
2827   if (dump_file && (dump_flags & TDF_DETAILS))
2828     fprintf (dump_file, "Addition in %s costs %d\n",
2829              GET_MODE_NAME (mode), cost);
2830   return cost;
2831 }
2832
2833 /* Entry in a hashtable of already known costs for multiplication.  */
2834 struct mbc_entry
2835 {
2836   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2837   enum machine_mode mode;       /* In mode.  */
2838   unsigned cost;                /* The cost.  */
2839 };
2840
2841 /* Counts hash value for the ENTRY.  */
2842
2843 static hashval_t
2844 mbc_entry_hash (const void *entry)
2845 {
2846   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2847
2848   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2849 }
2850
2851 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2852
2853 static int
2854 mbc_entry_eq (const void *entry1, const void *entry2)
2855 {
2856   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2857   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2858
2859   return (e1->mode == e2->mode
2860           && e1->cst == e2->cst);
2861 }
2862
2863 /* Returns cost of multiplication by constant CST in MODE.  */
2864
2865 unsigned
2866 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2867 {
2868   static htab_t costs;
2869   struct mbc_entry **cached, act;
2870   rtx seq;
2871   unsigned cost;
2872
2873   if (!costs)
2874     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2875
2876   act.mode = mode;
2877   act.cst = cst;
2878   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2879   if (*cached)
2880     return (*cached)->cost;
2881
2882   *cached = XNEW (struct mbc_entry);
2883   (*cached)->mode = mode;
2884   (*cached)->cst = cst;
2885
2886   start_sequence ();
2887   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2888                gen_int_mode (cst, mode), NULL_RTX, 0);
2889   seq = get_insns ();
2890   end_sequence ();
2891
2892   cost = seq_cost (seq);
2893
2894   if (dump_file && (dump_flags & TDF_DETAILS))
2895     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2896              (int) cst, GET_MODE_NAME (mode), cost);
2897
2898   (*cached)->cost = cost;
2899
2900   return cost;
2901 }
2902
2903 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2904    validity for a memory reference accessing memory of mode MODE.  */
2905
2906 bool
2907 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2908 {
2909 #define MAX_RATIO 128
2910   static sbitmap valid_mult[MAX_MACHINE_MODE];
2911
2912   if (!valid_mult[mode])
2913     {
2914       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2915       rtx addr;
2916       HOST_WIDE_INT i;
2917
2918       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2919       sbitmap_zero (valid_mult[mode]);
2920       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2921       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2922         {
2923           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2924           if (memory_address_p (mode, addr))
2925             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2926         }
2927
2928       if (dump_file && (dump_flags & TDF_DETAILS))
2929         {
2930           fprintf (dump_file, "  allowed multipliers:");
2931           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2932             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2933               fprintf (dump_file, " %d", (int) i);
2934           fprintf (dump_file, "\n");
2935           fprintf (dump_file, "\n");
2936         }
2937     }
2938
2939   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2940     return false;
2941
2942   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2943 }
2944
2945 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2946    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2947    variable is omitted.  Compute the cost for a memory reference that accesses
2948    a memory location of mode MEM_MODE.
2949
2950    TODO -- there must be some better way.  This all is quite crude.  */
2951
2952 static comp_cost
2953 get_address_cost (bool symbol_present, bool var_present,
2954                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2955                   enum machine_mode mem_mode)
2956 {
2957   static bool initialized[MAX_MACHINE_MODE];
2958   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2959   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2960   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2961   unsigned cost, acost, complexity;
2962   bool offset_p, ratio_p;
2963   HOST_WIDE_INT s_offset;
2964   unsigned HOST_WIDE_INT mask;
2965   unsigned bits;
2966
2967   if (!initialized[mem_mode])
2968     {
2969       HOST_WIDE_INT i;
2970       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2971       int old_cse_not_expected;
2972       unsigned sym_p, var_p, off_p, rat_p, add_c;
2973       rtx seq, addr, base;
2974       rtx reg0, reg1;
2975
2976       initialized[mem_mode] = true;
2977
2978       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2979
2980       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2981       for (i = start; i <= 1 << 20; i <<= 1)
2982         {
2983           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2984           if (!memory_address_p (mem_mode, addr))
2985             break;
2986         }
2987       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2988       off[mem_mode] = max_offset[mem_mode];
2989
2990       for (i = start; i <= 1 << 20; i <<= 1)
2991         {
2992           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2993           if (!memory_address_p (mem_mode, addr))
2994             break;
2995         }
2996       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2997
2998       if (dump_file && (dump_flags & TDF_DETAILS))
2999         {
3000           fprintf (dump_file, "get_address_cost:\n");
3001           fprintf (dump_file, "  min offset %s %d\n",
3002                    GET_MODE_NAME (mem_mode),
3003                    (int) min_offset[mem_mode]);
3004           fprintf (dump_file, "  max offset %s %d\n",
3005                    GET_MODE_NAME (mem_mode),
3006                    (int) max_offset[mem_mode]);
3007         }
3008
3009       rat[mem_mode] = 1;
3010       for (i = 2; i <= MAX_RATIO; i++)
3011         if (multiplier_allowed_in_address_p (i, mem_mode))
3012           {
3013             rat[mem_mode] = i;
3014             break;
3015           }
3016
3017       /* Compute the cost of various addressing modes.  */
3018       acost = 0;
3019       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3020       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3021
3022       for (i = 0; i < 16; i++)
3023         {
3024           sym_p = i & 1;
3025           var_p = (i >> 1) & 1;
3026           off_p = (i >> 2) & 1;
3027           rat_p = (i >> 3) & 1;
3028
3029           addr = reg0;
3030           if (rat_p)
3031             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3032                                    gen_int_mode (rat[mem_mode], Pmode));
3033
3034           if (var_p)
3035             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3036
3037           if (sym_p)
3038             {
3039               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3040               /* ??? We can run into trouble with some backends by presenting
3041                  it with symbols which havn't been properly passed through
3042                  targetm.encode_section_info.  By setting the local bit, we
3043                  enhance the probability of things working.  */
3044               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3045
3046               if (off_p)
3047                 base = gen_rtx_fmt_e (CONST, Pmode,
3048                                       gen_rtx_fmt_ee (PLUS, Pmode,
3049                                                       base,
3050                                                       gen_int_mode (off[mem_mode],
3051                                                                     Pmode)));
3052             }
3053           else if (off_p)
3054             base = gen_int_mode (off[mem_mode], Pmode);
3055           else
3056             base = NULL_RTX;
3057
3058           if (base)
3059             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3060
3061           start_sequence ();
3062           /* To avoid splitting addressing modes, pretend that no cse will
3063              follow.  */
3064           old_cse_not_expected = cse_not_expected;
3065           cse_not_expected = true;
3066           addr = memory_address (mem_mode, addr);
3067           cse_not_expected = old_cse_not_expected;
3068           seq = get_insns ();
3069           end_sequence ();
3070
3071           acost = seq_cost (seq);
3072           acost += address_cost (addr, mem_mode);
3073
3074           if (!acost)
3075             acost = 1;
3076           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3077         }
3078
3079       /* On some targets, it is quite expensive to load symbol to a register,
3080          which makes addresses that contain symbols look much more expensive.
3081          However, the symbol will have to be loaded in any case before the
3082          loop (and quite likely we have it in register already), so it does not
3083          make much sense to penalize them too heavily.  So make some final
3084          tweaks for the SYMBOL_PRESENT modes:
3085
3086          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3087          var is cheaper, use this mode with small penalty.
3088          If VAR_PRESENT is true, try whether the mode with
3089          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3090          if this is the case, use it.  */
3091       add_c = add_cost (Pmode);
3092       for (i = 0; i < 8; i++)
3093         {
3094           var_p = i & 1;
3095           off_p = (i >> 1) & 1;
3096           rat_p = (i >> 2) & 1;
3097
3098           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3099           if (var_p)
3100             acost += add_c;
3101
3102           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3103             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3104         }
3105
3106       if (dump_file && (dump_flags & TDF_DETAILS))
3107         {
3108           fprintf (dump_file, "Address costs:\n");
3109
3110           for (i = 0; i < 16; i++)
3111             {
3112               sym_p = i & 1;
3113               var_p = (i >> 1) & 1;
3114               off_p = (i >> 2) & 1;
3115               rat_p = (i >> 3) & 1;
3116
3117               fprintf (dump_file, "  ");
3118               if (sym_p)
3119                 fprintf (dump_file, "sym + ");
3120               if (var_p)
3121                 fprintf (dump_file, "var + ");
3122               if (off_p)
3123                 fprintf (dump_file, "cst + ");
3124               if (rat_p)
3125                 fprintf (dump_file, "rat * ");
3126
3127               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3128               fprintf (dump_file, "index costs %d\n", acost);
3129             }
3130           fprintf (dump_file, "\n");
3131         }
3132     }
3133
3134   bits = GET_MODE_BITSIZE (Pmode);
3135   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3136   offset &= mask;
3137   if ((offset >> (bits - 1) & 1))
3138     offset |= ~mask;
3139   s_offset = offset;
3140
3141   cost = 0;
3142   offset_p = (s_offset != 0
3143               && min_offset[mem_mode] <= s_offset
3144               && s_offset <= max_offset[mem_mode]);
3145   ratio_p = (ratio != 1
3146              && multiplier_allowed_in_address_p (ratio, mem_mode));
3147
3148   if (ratio != 1 && !ratio_p)
3149     cost += multiply_by_cost (ratio, Pmode);
3150
3151   if (s_offset && !offset_p && !symbol_present)
3152     cost += add_cost (Pmode);
3153
3154   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3155   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3156   return new_cost (cost + acost, complexity);
3157 }
3158
3159 /* Estimates cost of forcing expression EXPR into a variable.  */
3160
3161 static comp_cost
3162 force_expr_to_var_cost (tree expr)
3163 {
3164   static bool costs_initialized = false;
3165   static unsigned integer_cost;
3166   static unsigned symbol_cost;
3167   static unsigned address_cost;
3168   tree op0, op1;
3169   comp_cost cost0, cost1, cost;
3170   enum machine_mode mode;
3171
3172   if (!costs_initialized)
3173     {
3174       tree type = build_pointer_type (integer_type_node);
3175       tree var, addr;
3176       rtx x;
3177
3178       var = create_tmp_var_raw (integer_type_node, "test_var");
3179       TREE_STATIC (var) = 1;
3180       x = produce_memory_decl_rtl (var, NULL);
3181       SET_DECL_RTL (var, x);
3182
3183       integer_cost = computation_cost (build_int_cst (integer_type_node,
3184                                                       2000));
3185
3186       addr = build1 (ADDR_EXPR, type, var);
3187       symbol_cost = computation_cost (addr) + 1;
3188
3189       address_cost
3190         = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3191                                     addr,
3192                                     build_int_cst (sizetype, 2000))) + 1;
3193       if (dump_file && (dump_flags & TDF_DETAILS))
3194         {
3195           fprintf (dump_file, "force_expr_to_var_cost:\n");
3196           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3197           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3198           fprintf (dump_file, "  address %d\n", (int) address_cost);
3199           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3200           fprintf (dump_file, "\n");
3201         }
3202
3203       costs_initialized = true;
3204     }
3205
3206   STRIP_NOPS (expr);
3207
3208   if (SSA_VAR_P (expr))
3209     return zero_cost;
3210
3211   if (TREE_INVARIANT (expr))
3212     {
3213       if (TREE_CODE (expr) == INTEGER_CST)
3214         return new_cost (integer_cost, 0);
3215
3216       if (TREE_CODE (expr) == ADDR_EXPR)
3217         {
3218           tree obj = TREE_OPERAND (expr, 0);
3219
3220           if (TREE_CODE (obj) == VAR_DECL
3221               || TREE_CODE (obj) == PARM_DECL
3222               || TREE_CODE (obj) == RESULT_DECL)
3223             return new_cost (symbol_cost, 0);
3224         }
3225
3226       return new_cost (address_cost, 0);
3227     }
3228
3229   switch (TREE_CODE (expr))
3230     {
3231     case POINTER_PLUS_EXPR:
3232     case PLUS_EXPR:
3233     case MINUS_EXPR:
3234     case MULT_EXPR:
3235       op0 = TREE_OPERAND (expr, 0);
3236       op1 = TREE_OPERAND (expr, 1);
3237       STRIP_NOPS (op0);
3238       STRIP_NOPS (op1);
3239
3240       if (is_gimple_val (op0))
3241         cost0 = zero_cost;
3242       else
3243         cost0 = force_expr_to_var_cost (op0);
3244
3245       if (is_gimple_val (op1))
3246         cost1 = zero_cost;
3247       else
3248         cost1 = force_expr_to_var_cost (op1);
3249
3250       break;
3251
3252     default:
3253       /* Just an arbitrary value, FIXME.  */
3254       return new_cost (target_spill_cost, 0);
3255     }
3256
3257   mode = TYPE_MODE (TREE_TYPE (expr));
3258   switch (TREE_CODE (expr))
3259     {
3260     case POINTER_PLUS_EXPR:
3261     case PLUS_EXPR:
3262     case MINUS_EXPR:
3263       cost = new_cost (add_cost (mode), 0);
3264       break;
3265
3266     case MULT_EXPR:
3267       if (cst_and_fits_in_hwi (op0))
3268         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3269       else if (cst_and_fits_in_hwi (op1))
3270         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3271       else
3272         return new_cost (target_spill_cost, 0);
3273       break;
3274
3275     default:
3276       gcc_unreachable ();
3277     }
3278
3279   cost = add_costs (cost, cost0);
3280   cost = add_costs (cost, cost1);
3281
3282   /* Bound the cost by target_spill_cost.  The parts of complicated
3283      computations often are either loop invariant or at least can
3284      be shared between several iv uses, so letting this grow without
3285      limits would not give reasonable results.  */
3286   if (cost.cost > target_spill_cost)
3287     cost.cost = target_spill_cost;
3288
3289   return cost;
3290 }
3291
3292 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3293    invariants the computation depends on.  */
3294
3295 static comp_cost
3296 force_var_cost (struct ivopts_data *data,
3297                 tree expr, bitmap *depends_on)
3298 {
3299   if (depends_on)
3300     {
3301       fd_ivopts_data = data;
3302       walk_tree (&expr, find_depends, depends_on, NULL);
3303     }
3304
3305   return force_expr_to_var_cost (expr);
3306 }
3307
3308 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3309    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3310    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3311    invariants the computation depends on.  */
3312
3313 static comp_cost
3314 split_address_cost (struct ivopts_data *data,
3315                     tree addr, bool *symbol_present, bool *var_present,
3316                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3317 {
3318   tree core;
3319   HOST_WIDE_INT bitsize;
3320   HOST_WIDE_INT bitpos;
3321   tree toffset;
3322   enum machine_mode mode;
3323   int unsignedp, volatilep;
3324
3325   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3326                               &unsignedp, &volatilep, false);
3327
3328   if (toffset != 0
3329       || bitpos % BITS_PER_UNIT != 0
3330       || TREE_CODE (core) != VAR_DECL)
3331     {
3332       *symbol_present = false;
3333       *var_present = true;
3334       fd_ivopts_data = data;
3335       walk_tree (&addr, find_depends, depends_on, NULL);
3336       return new_cost (target_spill_cost, 0);
3337     }
3338
3339   *offset += bitpos / BITS_PER_UNIT;
3340   if (TREE_STATIC (core)
3341       || DECL_EXTERNAL (core))
3342     {
3343       *symbol_present = true;
3344       *var_present = false;
3345       return zero_cost;
3346     }
3347
3348   *symbol_present = false;
3349   *var_present = true;
3350   return zero_cost;
3351 }
3352
3353 /* Estimates cost of expressing difference of addresses E1 - E2 as
3354    var + symbol + offset.  The value of offset is added to OFFSET,
3355    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3356    part is missing.  DEPENDS_ON is a set of the invariants the computation
3357    depends on.  */
3358
3359 static comp_cost
3360 ptr_difference_cost (struct ivopts_data *data,
3361                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3362                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3363 {
3364   HOST_WIDE_INT diff = 0;
3365   comp_cost cost;
3366
3367   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3368
3369   if (ptr_difference_const (e1, e2, &diff))
3370     {
3371       *offset += diff;
3372       *symbol_present = false;
3373       *var_present = false;
3374       return zero_cost;
3375     }
3376
3377   if (integer_zerop (e2))
3378     return split_address_cost (data, TREE_OPERAND (e1, 0),
3379                                symbol_present, var_present, offset, depends_on);
3380
3381   *symbol_present = false;
3382   *var_present = true;
3383
3384   cost = force_var_cost (data, e1, depends_on);
3385   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3386   cost.cost += add_cost (Pmode);
3387
3388   return cost;
3389 }
3390
3391 /* Estimates cost of expressing difference E1 - E2 as
3392    var + symbol + offset.  The value of offset is added to OFFSET,
3393    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3394    part is missing.  DEPENDS_ON is a set of the invariants the computation
3395    depends on.  */
3396
3397 static comp_cost
3398 difference_cost (struct ivopts_data *data,
3399                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3400                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3401 {
3402   comp_cost cost;
3403   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3404   unsigned HOST_WIDE_INT off1, off2;
3405
3406   e1 = strip_offset (e1, &off1);
3407   e2 = strip_offset (e2, &off2);
3408   *offset += off1 - off2;
3409
3410   STRIP_NOPS (e1);
3411   STRIP_NOPS (e2);
3412
3413   if (TREE_CODE (e1) == ADDR_EXPR)
3414     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3415                                 depends_on);
3416   *symbol_present = false;
3417
3418   if (operand_equal_p (e1, e2, 0))
3419     {
3420       *var_present = false;
3421       return zero_cost;
3422     }
3423   *var_present = true;
3424   if (integer_zerop (e2))
3425     return force_var_cost (data, e1, depends_on);
3426
3427   if (integer_zerop (e1))
3428     {
3429       cost = force_var_cost (data, e2, depends_on);
3430       cost.cost += multiply_by_cost (-1, mode);
3431
3432       return cost;
3433     }
3434
3435   cost = force_var_cost (data, e1, depends_on);
3436   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3437   cost.cost += add_cost (mode);
3438
3439   return cost;
3440 }
3441
3442 /* Determines the cost of the computation by that USE is expressed
3443    from induction variable CAND.  If ADDRESS_P is true, we just need
3444    to create an address from it, otherwise we want to get it into
3445    register.  A set of invariants we depend on is stored in
3446    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3447
3448 static comp_cost
3449 get_computation_cost_at (struct ivopts_data *data,
3450                          struct iv_use *use, struct iv_cand *cand,
3451                          bool address_p, bitmap *depends_on, tree at)
3452 {
3453   tree ubase = use->iv->base, ustep = use->iv->step;
3454   tree cbase, cstep;
3455   tree utype = TREE_TYPE (ubase), ctype;
3456   unsigned HOST_WIDE_INT cstepi, offset = 0;
3457   HOST_WIDE_INT ratio, aratio;
3458   bool var_present, symbol_present;
3459   comp_cost cost;
3460   unsigned n_sums;
3461   double_int rat;
3462
3463   *depends_on = NULL;
3464
3465   /* Only consider real candidates.  */
3466   if (!cand->iv)
3467     return infinite_cost;
3468
3469   cbase = cand->iv->base;
3470   cstep = cand->iv->step;
3471   ctype = TREE_TYPE (cbase);
3472
3473   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3474     {
3475       /* We do not have a precision to express the values of use.  */
3476       return infinite_cost;
3477     }
3478
3479   if (address_p)
3480     {
3481       /* Do not try to express address of an object with computation based
3482          on address of a different object.  This may cause problems in rtl
3483          level alias analysis (that does not expect this to be happening,
3484          as this is illegal in C), and would be unlikely to be useful
3485          anyway.  */
3486       if (use->iv->base_object
3487           && cand->iv->base_object
3488           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3489         return infinite_cost;
3490     }
3491
3492   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3493     {
3494       /* TODO -- add direct handling of this case.  */
3495       goto fallback;
3496     }
3497
3498   /* CSTEPI is removed from the offset in case statement is after the
3499      increment.  If the step is not constant, we use zero instead.
3500      This is a bit imprecise (there is the extra addition), but
3501      redundancy elimination is likely to transform the code so that
3502      it uses value of the variable before increment anyway,
3503      so it is not that much unrealistic.  */
3504   if (cst_and_fits_in_hwi (cstep))
3505     cstepi = int_cst_value (cstep);
3506   else
3507     cstepi = 0;
3508
3509   if (!constant_multiple_of (ustep, cstep, &rat))
3510     return infinite_cost;
3511
3512   if (double_int_fits_in_shwi_p (rat))
3513     ratio = double_int_to_shwi (rat);
3514   else
3515     return infinite_cost;
3516
3517   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3518      or ratio == 1, it is better to handle this like
3519
3520      ubase - ratio * cbase + ratio * var
3521
3522      (also holds in the case ratio == -1, TODO.  */
3523
3524   if (cst_and_fits_in_hwi (cbase))
3525     {
3526       offset = - ratio * int_cst_value (cbase);
3527       cost = difference_cost (data,
3528                               ubase, build_int_cst (utype, 0),
3529                               &symbol_present, &var_present, &offset,
3530                               depends_on);
3531     }
3532   else if (ratio == 1)
3533     {
3534       cost = difference_cost (data,
3535                               ubase, cbase,
3536                               &symbol_present, &var_present, &offset,
3537                               depends_on);
3538     }
3539   else
3540     {
3541       cost = force_var_cost (data, cbase, depends_on);
3542       cost.cost += add_cost (TYPE_MODE (ctype));
3543       cost = add_costs (cost,
3544                         difference_cost (data,
3545                                          ubase, build_int_cst (utype, 0),
3546                                          &symbol_present, &var_present,
3547                                          &offset, depends_on));
3548     }
3549
3550   /* If we are after the increment, the value of the candidate is higher by
3551      one iteration.  */
3552   if (stmt_after_increment (data->current_loop, cand, at))
3553     offset -= ratio * cstepi;
3554
3555   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3556      (symbol/var/const parts may be omitted).  If we are looking for an address,
3557      find the cost of addressing this.  */
3558   if (address_p)
3559     return add_costs (cost, get_address_cost (symbol_present, var_present,
3560                                 offset, ratio,
3561                                 TYPE_MODE (TREE_TYPE (*use->op_p))));
3562
3563   /* Otherwise estimate the costs for computing the expression.  */
3564   aratio = ratio > 0 ? ratio : -ratio;
3565   if (!symbol_present && !var_present && !offset)
3566     {
3567       if (ratio != 1)
3568         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3569
3570       return cost;
3571     }
3572
3573   if (aratio != 1)
3574     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3575
3576   n_sums = 1;
3577   if (var_present
3578       /* Symbol + offset should be compile-time computable.  */
3579       && (symbol_present || offset))
3580     n_sums++;
3581
3582   /* Having offset does not affect runtime cost in case it is added to
3583      symbol, but it increases complexity.  */
3584   if (offset)
3585     cost.complexity++;
3586
3587   cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3588   return cost;
3589
3590 fallback:
3591   {
3592     /* Just get the expression, expand it and measure the cost.  */
3593     tree comp = get_computation_at (data->current_loop, use, cand, at);
3594
3595     if (!comp)
3596       return infinite_cost;
3597
3598     if (address_p)
3599       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3600
3601     return new_cost (computation_cost (comp), 0);
3602   }
3603 }
3604
3605 /* Determines the cost of the computation by that USE is expressed
3606    from induction variable CAND.  If ADDRESS_P is true, we just need
3607    to create an address from it, otherwise we want to get it into
3608    register.  A set of invariants we depend on is stored in
3609    DEPENDS_ON.  */
3610
3611 static comp_cost
3612 get_computation_cost (struct ivopts_data *data,
3613                       struct iv_use *use, struct iv_cand *cand,
3614                       bool address_p, bitmap *depends_on)
3615 {
3616   return get_computation_cost_at (data,
3617                                   use, cand, address_p, depends_on, use->stmt);
3618 }
3619
3620 /* Determines cost of basing replacement of USE on CAND in a generic
3621    expression.  */
3622
3623 static bool
3624 determine_use_iv_cost_generic (struct ivopts_data *data,
3625                                struct iv_use *use, struct iv_cand *cand)
3626 {
3627   bitmap depends_on;
3628   comp_cost cost;
3629
3630   /* The simple case first -- if we need to express value of the preserved
3631      original biv, the cost is 0.  This also prevents us from counting the
3632      cost of increment twice -- once at this use and once in the cost of
3633      the candidate.  */
3634   if (cand->pos == IP_ORIGINAL
3635       && cand->incremented_at == use->stmt)
3636     {
3637       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3638       return true;
3639     }
3640
3641   cost = get_computation_cost (data, use, cand, false, &depends_on);
3642   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3643
3644   return !infinite_cost_p (cost);
3645 }
3646
3647 /* Determines cost of basing replacement of USE on CAND in an address.  */
3648
3649 static bool
3650 determine_use_iv_cost_address (struct ivopts_data *data,
3651                                struct iv_use *use, struct iv_cand *cand)
3652 {
3653   bitmap depends_on;
3654   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3655
3656   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3657
3658   return !infinite_cost_p (cost);
3659 }
3660
3661 /* Computes value of candidate CAND at position AT in iteration NITER, and
3662    stores it to VAL.  */
3663
3664 static void
3665 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3666                aff_tree *val)
3667 {
3668   aff_tree step, delta, nit;
3669   struct iv *iv = cand->iv;
3670   tree type = TREE_TYPE (iv->base);
3671
3672   tree_to_aff_combination (iv->step, type, &step);
3673   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3674   aff_combination_convert (&nit, type);
3675   aff_combination_mult (&nit, &step, &delta);
3676   if (stmt_after_increment (loop, cand, at))
3677     aff_combination_add (&delta, &step);
3678
3679   tree_to_aff_combination (iv->base, type, val);
3680   aff_combination_add (val, &delta);
3681 }
3682
3683 /* Returns period of induction variable iv.  */
3684
3685 static tree
3686 iv_period (struct iv *iv)
3687 {
3688   tree step = iv->step, period, type;
3689   tree pow2div;
3690
3691   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3692
3693   /* Period of the iv is gcd (step, type range).  Since type range is power
3694      of two, it suffices to determine the maximum power of two that divides
3695      step.  */
3696   pow2div = num_ending_zeros (step);
3697   type = unsigned_type_for (TREE_TYPE (step));
3698
3699   period = build_low_bits_mask (type,
3700                                 (TYPE_PRECISION (type)
3701                                  - tree_low_cst (pow2div, 1)));
3702
3703   return period;
3704 }
3705
3706 /* Returns the comparison operator used when eliminating the iv USE.  */
3707
3708 static enum tree_code
3709 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3710 {
3711   struct loop *loop = data->current_loop;
3712   basic_block ex_bb;
3713   edge exit;
3714
3715   ex_bb = bb_for_stmt (use->stmt);
3716   exit = EDGE_SUCC (ex_bb, 0);
3717   if (flow_bb_inside_loop_p (loop, exit->dest))
3718     exit = EDGE_SUCC (ex_bb, 1);
3719
3720   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3721 }
3722
3723 /* Check whether it is possible to express the condition in USE by comparison
3724    of candidate CAND.  If so, store the value compared with to BOUND.  */
3725
3726 static bool
3727 may_eliminate_iv (struct ivopts_data *data,
3728                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3729 {
3730   basic_block ex_bb;
3731   edge exit;
3732   tree nit, period;
3733   struct loop *loop = data->current_loop;
3734   aff_tree bnd;
3735   double_int period_value, max_niter;
3736
3737   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3738     return false;
3739
3740   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3741      for other conditions inside loop body.  */
3742   ex_bb = bb_for_stmt (use->stmt);
3743   if (use->stmt != last_stmt (ex_bb)
3744       || TREE_CODE (use->stmt) != COND_EXPR)
3745     return false;
3746   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3747     return false;
3748
3749   exit = EDGE_SUCC (ex_bb, 0);
3750   if (flow_bb_inside_loop_p (loop, exit->dest))
3751     exit = EDGE_SUCC (ex_bb, 1);
3752   if (flow_bb_inside_loop_p (loop, exit->dest))
3753     return false;
3754
3755   nit = niter_for_exit (data, exit);
3756   if (!nit)
3757     return false;
3758
3759   /* Determine whether we may use the variable to test whether niter iterations
3760      elapsed.  This is the case iff the period of the induction variable is
3761      greater than the number of iterations.  */
3762   period = iv_period (cand->iv);
3763   if (!period)
3764     return false;
3765
3766   /* Compare the period with the estimate on the number of iterations of the
3767      loop.  */
3768   if (!estimated_loop_iterations (loop, true, &max_niter))
3769     return false;
3770   period_value = tree_to_double_int (period);
3771   if (double_int_ucmp (period_value, max_niter) <= 0)
3772     return false;
3773
3774   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3775   *bound = aff_combination_to_tree (&bnd);
3776   return true;
3777 }
3778
3779 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3780
3781 static bool
3782 determine_use_iv_cost_condition (struct ivopts_data *data,
3783                                  struct iv_use *use, struct iv_cand *cand)
3784 {
3785   tree bound = NULL_TREE;
3786   struct iv *cmp_iv;
3787   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3788   comp_cost elim_cost, express_cost, cost;
3789   bool ok;
3790
3791   /* Only consider real candidates.  */
3792   if (!cand->iv)
3793     {
3794       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3795       return false;
3796     }
3797
3798   /* Try iv elimination.  */
3799   if (may_eliminate_iv (data, use, cand, &bound))
3800     {
3801       elim_cost = force_var_cost (data, bound, &depends_on_elim);
3802       /* The bound is a loop invariant, so it will be only computed
3803          once.  */
3804       elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3805     }
3806   else
3807     elim_cost = infinite_cost;
3808
3809   /* Try expressing the original giv.  If it is compared with an invariant,
3810      note that we cannot get rid of it.  */
3811   ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3812   gcc_assert (ok);
3813
3814   express_cost = get_computation_cost (data, use, cand, false,
3815                                        &depends_on_express);
3816   fd_ivopts_data = data;
3817   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3818
3819   /* Choose the better approach.  */
3820   if (compare_costs (elim_cost, express_cost) < 0)
3821     {
3822       cost = elim_cost;
3823       depends_on = depends_on_elim;
3824       depends_on_elim = NULL;
3825     }
3826   else
3827     {
3828       cost = express_cost;
3829       depends_on = depends_on_express;
3830       depends_on_express = NULL;
3831       bound = NULL_TREE;
3832     }
3833
3834   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3835
3836   if (depends_on_elim)
3837     BITMAP_FREE (depends_on_elim);
3838   if (depends_on_express)
3839     BITMAP_FREE (depends_on_express);
3840
3841   return !infinite_cost_p (cost);
3842 }
3843
3844 /* Determines cost of basing replacement of USE on CAND.  Returns false
3845    if USE cannot be based on CAND.  */
3846
3847 static bool
3848 determine_use_iv_cost (struct ivopts_data *data,
3849                        struct iv_use *use, struct iv_cand *cand)
3850 {
3851   switch (use->type)
3852     {
3853     case USE_NONLINEAR_EXPR:
3854       return determine_use_iv_cost_generic (data, use, cand);
3855
3856     case USE_ADDRESS:
3857       return determine_use_iv_cost_address (data, use, cand);
3858
3859     case USE_COMPARE:
3860       return determine_use_iv_cost_condition (data, use, cand);
3861
3862     default:
3863       gcc_unreachable ();
3864     }
3865 }
3866
3867 /* Determines costs of basing the use of the iv on an iv candidate.  */
3868
3869 static void
3870 determine_use_iv_costs (struct ivopts_data *data)
3871 {
3872   unsigned i, j;
3873   struct iv_use *use;
3874   struct iv_cand *cand;
3875   bitmap to_clear = BITMAP_ALLOC (NULL);
3876
3877   alloc_use_cost_map (data);
3878
3879   for (i = 0; i < n_iv_uses (data); i++)
3880     {
3881       use = iv_use (data, i);
3882
3883       if (data->consider_all_candidates)
3884         {
3885           for (j = 0; j < n_iv_cands (data); j++)
3886             {
3887               cand = iv_cand (data, j);
3888               determine_use_iv_cost (data, use, cand);
3889             }
3890         }
3891       else
3892         {
3893           bitmap_iterator bi;
3894
3895           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3896             {
3897               cand = iv_cand (data, j);
3898               if (!determine_use_iv_cost (data, use, cand))
3899                 bitmap_set_bit (to_clear, j);
3900             }
3901
3902           /* Remove the candidates for that the cost is infinite from
3903              the list of related candidates.  */
3904           bitmap_and_compl_into (use->related_cands, to_clear);
3905           bitmap_clear (to_clear);
3906         }
3907     }
3908
3909   BITMAP_FREE (to_clear);
3910
3911   if (dump_file && (dump_flags & TDF_DETAILS))
3912     {
3913       fprintf (dump_file, "Use-candidate costs:\n");
3914
3915       for (i = 0; i < n_iv_uses (data); i++)
3916         {
3917           use = iv_use (data, i);
3918
3919           fprintf (dump_file, "Use %d:\n", i);
3920           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
3921           for (j = 0; j < use->n_map_members; j++)
3922             {
3923               if (!use->cost_map[j].cand
3924                   || infinite_cost_p (use->cost_map[j].cost))
3925                 continue;
3926
3927               fprintf (dump_file, "  %d\t%d\t%d\t",
3928                        use->cost_map[j].cand->id,
3929                        use->cost_map[j].cost.cost,
3930                        use->cost_map[j].cost.complexity);
3931               if (use->cost_map[j].depends_on)
3932                 bitmap_print (dump_file,
3933                               use->cost_map[j].depends_on, "","");
3934               fprintf (dump_file, "\n");
3935             }
3936
3937           fprintf (dump_file, "\n");
3938         }
3939       fprintf (dump_file, "\n");
3940     }
3941 }
3942
3943 /* Determines cost of the candidate CAND.  */
3944
3945 static void
3946 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3947 {
3948   comp_cost cost_base;
3949   unsigned cost, cost_step;
3950   tree base;
3951
3952   if (!cand->iv)
3953     {
3954       cand->cost = 0;
3955       return;
3956     }
3957
3958   /* There are two costs associated with the candidate -- its increment
3959      and its initialization.  The second is almost negligible for any loop
3960      that rolls enough, so we take it just very little into account.  */
3961
3962   base = cand->iv->base;
3963   cost_base = force_var_cost (data, base, NULL);
3964   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3965
3966   cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
3967
3968   /* Prefer the original ivs unless we may gain something by replacing it.
3969      The reason is to makee debugging simpler; so this is not relevant for
3970      artificial ivs created by other optimization passes.  */
3971   if (cand->pos != IP_ORIGINAL
3972       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3973     cost++;
3974
3975   /* Prefer not to insert statements into latch unless there are some
3976      already (so that we do not create unnecessary jumps).  */
3977   if (cand->pos == IP_END
3978       && empty_block_p (ip_end_pos (data->current_loop)))
3979     cost++;
3980
3981   cand->cost = cost;
3982 }
3983
3984 /* Determines costs of computation of the candidates.  */
3985
3986 static void
3987 determine_iv_costs (struct ivopts_data *data)
3988 {
3989   unsigned i;
3990
3991   if (dump_file && (dump_flags & TDF_DETAILS))
3992     {
3993       fprintf (dump_file, "Candidate costs:\n");
3994       fprintf (dump_file, "  cand\tcost\n");
3995     }
3996
3997   for (i = 0; i < n_iv_cands (data); i++)
3998     {
3999       struct iv_cand *cand = iv_cand (data, i);
4000
4001       determine_iv_cost (data, cand);
4002
4003       if (dump_file && (dump_flags & TDF_DETAILS))
4004         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4005     }
4006
4007   if (dump_file && (dump_flags & TDF_DETAILS))
4008     fprintf (dump_file, "\n");
4009 }
4010
4011 /* Calculates cost for having SIZE induction variables.  */
4012
4013 static unsigned
4014 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4015 {
4016   /* We add size to the cost, so that we prefer eliminating ivs
4017      if possible.  */
4018   return size + estimate_reg_pressure_cost (size, data->regs_used);
4019 }
4020
4021 /* For each size of the induction variable set determine the penalty.  */
4022
4023 static void
4024 determine_set_costs (struct ivopts_data *data)
4025 {
4026   unsigned j, n;
4027   tree phi, op;
4028   struct loop *loop = data->current_loop;
4029   bitmap_iterator bi;
4030
4031   /* We use the following model (definitely improvable, especially the
4032      cost function -- TODO):
4033
4034      We estimate the number of registers available (using MD data), name it A.
4035
4036      We estimate the number of registers used by the loop, name it U.  This
4037      number is obtained as the number of loop phi nodes (not counting virtual
4038      registers and bivs) + the number of variables from outside of the loop.
4039
4040      We set a reserve R (free regs that are used for temporary computations,
4041      etc.).  For now the reserve is a constant 3.
4042
4043      Let I be the number of induction variables.
4044
4045      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4046         make a lot of ivs without a reason).
4047      -- if A - R < U + I <= A, the cost is I * PRES_COST
4048      -- if U + I > A, the cost is I * PRES_COST and
4049         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4050
4051   if (dump_file && (dump_flags & TDF_DETAILS))
4052     {
4053       fprintf (dump_file, "Global costs:\n");
4054       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4055       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost);
4056       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4057     }
4058
4059   n = 0;
4060   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4061     {
4062       op = PHI_RESULT (phi);
4063
4064       if (!is_gimple_reg (op))
4065         continue;
4066
4067       if (get_iv (data, op))
4068         continue;
4069
4070       n++;
4071     }
4072
4073   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4074     {
4075       struct version_info *info = ver_info (data, j);
4076
4077       if (info->inv_id && info->has_nonlin_use)
4078         n++;
4079     }
4080
4081   data->regs_used = n;
4082   if (dump_file && (dump_flags & TDF_DETAILS))
4083     fprintf (dump_file, "  regs_used %d\n", n);
4084
4085   if (dump_file && (dump_flags & TDF_DETAILS))
4086     {
4087       fprintf (dump_file, "  cost for size:\n");
4088       fprintf (dump_file, "  ivs\tcost\n");
4089       for (j = 0; j <= 2 * target_avail_regs; j++)
4090         fprintf (dump_file, "  %d\t%d\n", j,
4091                  ivopts_global_cost_for_size (data, j));
4092       fprintf (dump_file, "\n");
4093     }
4094 }
4095
4096 /* Returns true if A is a cheaper cost pair than B.  */
4097
4098 static bool
4099 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4100 {
4101   int cmp;
4102
4103   if (!a)
4104     return false;
4105
4106   if (!b)
4107     return true;
4108
4109   cmp = compare_costs (a->cost, b->cost);
4110   if (cmp < 0)
4111     return true;
4112
4113   if (cmp > 0)
4114     return false;
4115
4116   /* In case the costs are the same, prefer the cheaper candidate.  */
4117   if (a->cand->cost < b->cand->cost)
4118     return true;
4119
4120   return false;
4121 }
4122
4123 /* Computes the cost field of IVS structure.  */
4124
4125 static void
4126 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4127 {
4128   comp_cost cost = ivs->cand_use_cost;
4129   cost.cost += ivs->cand_cost;
4130   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4131
4132   ivs->cost = cost;
4133 }
4134
4135 /* Remove invariants in set INVS to set IVS.  */
4136
4137 static void
4138 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4139 {
4140   bitmap_iterator bi;
4141   unsigned iid;
4142
4143   if (!invs)
4144     return;
4145
4146   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4147     {
4148       ivs->n_invariant_uses[iid]--;
4149       if (ivs->n_invariant_uses[iid] == 0)
4150         ivs->n_regs--;
4151     }
4152 }
4153
4154 /* Set USE not to be expressed by any candidate in IVS.  */
4155
4156 static void
4157 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4158                  struct iv_use *use)
4159 {
4160   unsigned uid = use->id, cid;
4161   struct cost_pair *cp;
4162
4163   cp = ivs->cand_for_use[uid];
4164   if (!cp)
4165     return;
4166   cid = cp->cand->id;
4167
4168   ivs->bad_uses++;
4169   ivs->cand_for_use[uid] = NULL;
4170   ivs->n_cand_uses[cid]--;
4171
4172   if (ivs->n_cand_uses[cid] == 0)
4173     {
4174       bitmap_clear_bit (ivs->cands, cid);
4175       /* Do not count the pseudocandidates.  */
4176       if (cp->cand->iv)
4177         ivs->n_regs--;
4178       ivs->n_cands--;
4179       ivs->cand_cost -= cp->cand->cost;
4180
4181       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4182     }
4183
4184   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4185
4186   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4187   iv_ca_recount_cost (data, ivs);
4188 }
4189
4190 /* Add invariants in set INVS to set IVS.  */
4191
4192 static void
4193 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4194 {
4195   bitmap_iterator bi;
4196   unsigned iid;
4197
4198   if (!invs)
4199     return;
4200
4201   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4202     {
4203       ivs->n_invariant_uses[iid]++;
4204       if (ivs->n_invariant_uses[iid] == 1)
4205         ivs->n_regs++;
4206     }
4207 }
4208
4209 /* Set cost pair for USE in set IVS to CP.  */
4210
4211 static void
4212 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4213               struct iv_use *use, struct cost_pair *cp)
4214 {
4215   unsigned uid = use->id, cid;
4216
4217   if (ivs->cand_for_use[uid] == cp)
4218     return;
4219
4220   if (ivs->cand_for_use[uid])
4221     iv_ca_set_no_cp (data, ivs, use);
4222
4223   if (cp)
4224     {
4225       cid = cp->cand->id;
4226
4227       ivs->bad_uses--;
4228       ivs->cand_for_use[uid] = cp;
4229       ivs->n_cand_uses[cid]++;
4230       if (ivs->n_cand_uses[cid] == 1)
4231         {
4232           bitmap_set_bit (ivs->cands, cid);
4233           /* Do not count the pseudocandidates.  */
4234           if (cp->cand->iv)
4235             ivs->n_regs++;
4236           ivs->n_cands++;
4237           ivs->cand_cost += cp->cand->cost;
4238
4239           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4240         }
4241
4242       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4243       iv_ca_set_add_invariants (ivs, cp->depends_on);
4244       iv_ca_recount_cost (data, ivs);
4245     }
4246 }
4247
4248 /* Extend set IVS by expressing USE by some of the candidates in it
4249    if possible.  */
4250
4251 static void
4252 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4253                struct iv_use *use)
4254 {
4255   struct cost_pair *best_cp = NULL, *cp;
4256   bitmap_iterator bi;
4257   unsigned i;
4258
4259   gcc_assert (ivs->upto >= use->id);
4260
4261   if (ivs->upto == use->id)
4262     {
4263       ivs->upto++;
4264       ivs->bad_uses++;
4265     }
4266
4267   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4268     {
4269       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4270
4271       if (cheaper_cost_pair (cp, best_cp))
4272         best_cp = cp;
4273     }
4274
4275   iv_ca_set_cp (data, ivs, use, best_cp);
4276 }
4277
4278 /* Get cost for assignment IVS.  */
4279
4280 static comp_cost
4281 iv_ca_cost (struct iv_ca *ivs)
4282 {
4283   return (ivs->bad_uses ? infinite_cost : ivs->cost);
4284 }
4285
4286 /* Returns true if all dependences of CP are among invariants in IVS.  */
4287
4288 static bool
4289 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4290 {
4291   unsigned i;
4292   bitmap_iterator bi;
4293
4294   if (!cp->depends_on)
4295     return true;
4296
4297   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4298     {
4299       if (ivs->n_invariant_uses[i] == 0)
4300         return false;
4301     }
4302
4303   return true;
4304 }
4305
4306 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4307    it before NEXT_CHANGE.  */
4308
4309 static struct iv_ca_delta *
4310 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4311                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4312 {
4313   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4314
4315   change->use = use;
4316   change->old_cp = old_cp;
4317   change->new_cp = new_cp;
4318   change->next_change = next_change;
4319
4320   return change;
4321 }
4322
4323 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4324    are rewritten.  */
4325
4326 static struct iv_ca_delta *
4327 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4328 {
4329   struct iv_ca_delta *last;
4330
4331   if (!l2)
4332     return l1;
4333
4334   if (!l1)
4335     return l2;
4336
4337   for (last = l1; last->next_change; last = last->next_change)
4338     continue;
4339   last->next_change = l2;
4340
4341   return l1;
4342 }
4343
4344 /* Returns candidate by that USE is expressed in IVS.  */
4345
4346 static struct cost_pair *
4347 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4348 {
4349   return ivs->cand_for_use[use->id];
4350 }
4351
4352 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4353
4354 static struct iv_ca_delta *
4355 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4356 {
4357   struct iv_ca_delta *act, *next, *prev = NULL;
4358   struct cost_pair *tmp;
4359
4360   for (act = delta; act; act = next)
4361     {
4362       next = act->next_change;
4363       act->next_change = prev;
4364       prev = act;
4365
4366       tmp = act->old_cp;
4367       act->old_cp = act->new_cp;
4368       act->new_cp = tmp;
4369     }
4370
4371   return prev;
4372 }
4373
4374 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4375    reverted instead.  */
4376
4377 static void
4378 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4379                     struct iv_ca_delta *delta, bool forward)
4380 {
4381   struct cost_pair *from, *to;
4382   struct iv_ca_delta *act;
4383
4384   if (!forward)
4385     delta = iv_ca_delta_reverse (delta);
4386
4387   for (act = delta; act; act = act->next_change)
4388     {
4389       from = act->old_cp;
4390       to = act->new_cp;
4391       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4392       iv_ca_set_cp (data, ivs, act->use, to);
4393     }
4394
4395   if (!forward)
4396     iv_ca_delta_reverse (delta);
4397 }
4398
4399 /* Returns true if CAND is used in IVS.  */
4400
4401 static bool
4402 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4403 {
4404   return ivs->n_cand_uses[cand->id] > 0;
4405 }
4406
4407 /* Returns number of induction variable candidates in the set IVS.  */
4408
4409 static unsigned
4410 iv_ca_n_cands (struct iv_ca *ivs)
4411 {
4412   return ivs->n_cands;
4413 }
4414
4415 /* Free the list of changes DELTA.  */
4416
4417 static void
4418 iv_ca_delta_free (struct iv_ca_delta **delta)
4419 {
4420   struct iv_ca_delta *act, *next;
4421
4422   for (act = *delta; act; act = next)
4423     {
4424       next = act->next_change;
4425       free (act);
4426     }
4427
4428   *delta = NULL;
4429 }
4430
4431 /* Allocates new iv candidates assignment.  */
4432
4433 static struct iv_ca *
4434 iv_ca_new (struct ivopts_data *data)
4435 {
4436   struct iv_ca *nw = XNEW (struct iv_ca);
4437
4438   nw->upto = 0;
4439   nw->bad_uses = 0;
4440   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4441   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4442   nw->cands = BITMAP_ALLOC (NULL);
4443   nw->n_cands = 0;
4444   nw->n_regs = 0;
4445   nw->cand_use_cost = zero_cost;
4446   nw->cand_cost = 0;
4447   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4448   nw->cost = zero_cost;
4449
4450   return nw;
4451 }
4452
4453 /* Free memory occupied by the set IVS.  */
4454
4455 static void
4456 iv_ca_free (struct iv_ca **ivs)
4457 {
4458   free ((*ivs)->cand_for_use);
4459   free ((*ivs)->n_cand_uses);
4460   BITMAP_FREE ((*ivs)->cands);
4461   free ((*ivs)->n_invariant_uses);
4462   free (*ivs);
4463   *ivs = NULL;
4464 }
4465
4466 /* Dumps IVS to FILE.  */
4467
4468 static void
4469 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4470 {
4471   const char *pref = "  invariants ";
4472   unsigned i;
4473   comp_cost cost = iv_ca_cost (ivs);
4474
4475   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4476   bitmap_print (file, ivs->cands, "  candidates ","\n");
4477
4478   for (i = 1; i <= data->max_inv_id; i++)
4479     if (ivs->n_invariant_uses[i])
4480       {
4481         fprintf (file, "%s%d", pref, i);
4482         pref = ", ";
4483       }
4484   fprintf (file, "\n");
4485 }
4486
4487 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4488    new set, and store differences in DELTA.  Number of induction variables
4489    in the new set is stored to N_IVS.  */
4490
4491 static comp_cost
4492 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4493               struct iv_cand *cand, struct iv_ca_delta **delta,
4494               unsigned *n_ivs)
4495 {
4496   unsigned i;
4497   comp_cost cost;
4498   struct iv_use *use;
4499   struct cost_pair *old_cp, *new_cp;
4500
4501   *delta = NULL;
4502   for (i = 0; i < ivs->upto; i++)
4503     {
4504       use = iv_use (data, i);
4505       old_cp = iv_ca_cand_for_use (ivs, use);
4506
4507       if (old_cp
4508           && old_cp->cand == cand)
4509         continue;
4510
4511       new_cp = get_use_iv_cost (data, use, cand);
4512       if (!new_cp)
4513         continue;
4514
4515       if (!iv_ca_has_deps (ivs, new_cp))
4516         continue;
4517
4518       if (!cheaper_cost_pair (new_cp, old_cp))
4519         continue;
4520
4521       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4522     }
4523
4524   iv_ca_delta_commit (data, ivs, *delta, true);
4525   cost = iv_ca_cost (ivs);
4526   if (n_ivs)
4527     *n_ivs = iv_ca_n_cands (ivs);
4528   iv_ca_delta_commit (data, ivs, *delta, false);
4529
4530   return cost;
4531 }
4532
4533 /* Try narrowing set IVS by removing CAND.  Return the cost of
4534    the new set and store the differences in DELTA.  */
4535
4536 static comp_cost
4537 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4538               struct iv_cand *cand, struct iv_ca_delta **delta)
4539 {
4540   unsigned i, ci;
4541   struct iv_use *use;
4542   struct cost_pair *old_cp, *new_cp, *cp;
4543   bitmap_iterator bi;
4544   struct iv_cand *cnd;
4545   comp_cost cost;
4546
4547   *delta = NULL;
4548   for (i = 0; i < n_iv_uses (data); i++)
4549     {
4550       use = iv_use (data, i);
4551
4552       old_cp = iv_ca_cand_for_use (ivs, use);
4553       if (old_cp->cand != cand)
4554         continue;
4555
4556       new_cp = NULL;
4557
4558       if (data->consider_all_candidates)
4559         {
4560           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4561             {
4562               if (ci == cand->id)
4563                 continue;
4564
4565               cnd = iv_cand (data, ci);
4566
4567               cp = get_use_iv_cost (data, use, cnd);
4568               if (!cp)
4569                 continue;
4570               if (!iv_ca_has_deps (ivs, cp))
4571                 continue;
4572
4573               if (!cheaper_cost_pair (cp, new_cp))
4574                 continue;
4575
4576               new_cp = cp;
4577             }
4578         }
4579       else
4580         {
4581           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4582             {
4583               if (ci == cand->id)
4584                 continue;
4585
4586               cnd = iv_cand (data, ci);
4587
4588               cp = get_use_iv_cost (data, use, cnd);
4589               if (!cp)
4590                 continue;
4591               if (!iv_ca_has_deps (ivs, cp))
4592                 continue;
4593
4594               if (!cheaper_cost_pair (cp, new_cp))
4595                 continue;
4596
4597               new_cp = cp;
4598             }
4599         }
4600
4601       if (!new_cp)
4602         {
4603           iv_ca_delta_free (delta);
4604           return infinite_cost;
4605         }
4606
4607       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4608     }
4609
4610   iv_ca_delta_commit (data, ivs, *delta, true);
4611   cost = iv_ca_cost (ivs);
4612   iv_ca_delta_commit (data, ivs, *delta, false);
4613
4614   return cost;
4615 }
4616
4617 /* Try optimizing the set of candidates IVS by removing candidates different
4618    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4619    differences in DELTA.  */
4620
4621 static comp_cost
4622 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4623              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4624 {
4625   bitmap_iterator bi;
4626   struct iv_ca_delta *act_delta, *best_delta;
4627   unsigned i;
4628   comp_cost best_cost, acost;
4629   struct iv_cand *cand;
4630
4631   best_delta = NULL;
4632   best_cost = iv_ca_cost (ivs);
4633
4634   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4635     {
4636       cand = iv_cand (data, i);
4637
4638       if (cand == except_cand)
4639         continue;
4640
4641       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4642
4643       if (compare_costs (acost, best_cost) < 0)
4644         {
4645           best_cost = acost;
4646           iv_ca_delta_free (&best_delta);
4647           best_delta = act_delta;
4648         }
4649       else
4650         iv_ca_delta_free (&act_delta);
4651     }
4652
4653   if (!best_delta)
4654     {
4655       *delta = NULL;
4656       return best_cost;
4657     }
4658
4659   /* Recurse to possibly remove other unnecessary ivs.  */
4660   iv_ca_delta_commit (data, ivs, best_delta, true);
4661   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4662   iv_ca_delta_commit (data, ivs, best_delta, false);
4663   *delta = iv_ca_delta_join (best_delta, *delta);
4664   return best_cost;
4665 }
4666
4667 /* Tries to extend the sets IVS in the best possible way in order
4668    to express the USE.  */
4669
4670 static bool
4671 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4672                   struct iv_use *use)
4673 {
4674   comp_cost best_cost, act_cost;
4675   unsigned i;
4676   bitmap_iterator bi;
4677   struct iv_cand *cand;
4678   struct iv_ca_delta *best_delta = NULL, *act_delta;
4679   struct cost_pair *cp;
4680
4681   iv_ca_add_use (data, ivs, use);
4682   best_cost = iv_ca_cost (ivs);
4683
4684   cp = iv_ca_cand_for_use (ivs, use);
4685   if (cp)
4686     {
4687       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4688       iv_ca_set_no_cp (data, ivs, use);
4689     }
4690
4691   /* First try important candidates not based on any memory object.  Only if
4692      this fails, try the specific ones.  Rationale -- in loops with many
4693      variables the best choice often is to use just one generic biv.  If we
4694      added here many ivs specific to the uses, the optimization algorithm later
4695      would be likely to get stuck in a local minimum, thus causing us to create
4696      too many ivs.  The approach from few ivs to more seems more likely to be
4697      successful -- starting from few ivs, replacing an expensive use by a
4698      specific iv should always be a win.  */
4699   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4700     {
4701       cand = iv_cand (data, i);
4702
4703       if (cand->iv->base_object != NULL_TREE)
4704         continue;
4705
4706       if (iv_ca_cand_used_p (ivs, cand))
4707         continue;
4708
4709       cp = get_use_iv_cost (data, use, cand);
4710       if (!cp)
4711         continue;
4712
4713       iv_ca_set_cp (data, ivs, use, cp);
4714       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4715       iv_ca_set_no_cp (data, ivs, use);
4716       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4717
4718       if (compare_costs (act_cost, best_cost) < 0)
4719         {
4720           best_cost = act_cost;
4721
4722           iv_ca_delta_free (&best_delta);
4723           best_delta = act_delta;
4724         }
4725       else
4726         iv_ca_delta_free (&act_delta);
4727     }
4728
4729   if (infinite_cost_p (best_cost))
4730     {
4731       for (i = 0; i < use->n_map_members; i++)
4732         {
4733           cp = use->cost_map + i;
4734           cand = cp->cand;
4735           if (!cand)
4736             continue;
4737
4738           /* Already tried this.  */
4739           if (cand->important && cand->iv->base_object == NULL_TREE)
4740             continue;
4741
4742           if (iv_ca_cand_used_p (ivs, cand))
4743             continue;
4744
4745           act_delta = NULL;
4746           iv_ca_set_cp (data, ivs, use, cp);
4747           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4748           iv_ca_set_no_cp (data, ivs, use);
4749           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4750                                        cp, act_delta);
4751
4752           if (compare_costs (act_cost, best_cost) < 0)
4753             {
4754               best_cost = act_cost;
4755
4756               if (best_delta)
4757                 iv_ca_delta_free (&best_delta);
4758               best_delta = act_delta;
4759             }
4760           else
4761             iv_ca_delta_free (&act_delta);
4762         }
4763     }
4764
4765   iv_ca_delta_commit (data, ivs, best_delta, true);
4766   iv_ca_delta_free (&best_delta);
4767
4768   return !infinite_cost_p (best_cost);
4769 }
4770
4771 /* Finds an initial assignment of candidates to uses.  */
4772
4773 static struct iv_ca *
4774 get_initial_solution (struct ivopts_data *data)
4775 {
4776   struct iv_ca *ivs = iv_ca_new (data);
4777   unsigned i;
4778
4779   for (i = 0; i < n_iv_uses (data); i++)
4780     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4781       {
4782         iv_ca_free (&ivs);
4783         return NULL;
4784       }
4785
4786   return ivs;
4787 }
4788
4789 /* Tries to improve set of induction variables IVS.  */
4790
4791 static bool
4792 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4793 {
4794   unsigned i, n_ivs;
4795   comp_cost acost, best_cost = iv_ca_cost (ivs);
4796   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4797   struct iv_cand *cand;
4798
4799   /* Try extending the set of induction variables by one.  */
4800   for (i = 0; i < n_iv_cands (data); i++)
4801     {
4802       cand = iv_cand (data, i);
4803
4804       if (iv_ca_cand_used_p (ivs, cand))
4805         continue;
4806
4807       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4808       if (!act_delta)
4809         continue;
4810
4811       /* If we successfully added the candidate and the set is small enough,
4812          try optimizing it by removing other candidates.  */
4813       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4814         {
4815           iv_ca_delta_commit (data, ivs, act_delta, true);
4816           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4817           iv_ca_delta_commit (data, ivs, act_delta, false);
4818           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4819         }
4820
4821       if (compare_costs (acost, best_cost) < 0)
4822         {
4823           best_cost = acost;
4824           iv_ca_delta_free (&best_delta);
4825           best_delta = act_delta;
4826         }
4827       else
4828         iv_ca_delta_free (&act_delta);
4829     }
4830
4831   if (!best_delta)
4832     {
4833       /* Try removing the candidates from the set instead.  */
4834       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4835
4836       /* Nothing more we can do.  */
4837       if (!best_delta)
4838         return false;
4839     }
4840
4841   iv_ca_delta_commit (data, ivs, best_delta, true);
4842   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4843   iv_ca_delta_free (&best_delta);
4844   return true;
4845 }
4846
4847 /* Attempts to find the optimal set of induction variables.  We do simple
4848    greedy heuristic -- we try to replace at most one candidate in the selected
4849    solution and remove the unused ivs while this improves the cost.  */
4850
4851 static struct iv_ca *
4852 find_optimal_iv_set (struct ivopts_data *data)
4853 {
4854   unsigned i;
4855   struct iv_ca *set;
4856   struct iv_use *use;
4857
4858   /* Get the initial solution.  */
4859   set = get_initial_solution (data);
4860   if (!set)
4861     {
4862       if (dump_file && (dump_flags & TDF_DETAILS))
4863         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4864       return NULL;
4865     }
4866
4867   if (dump_file && (dump_flags & TDF_DETAILS))
4868     {
4869       fprintf (dump_file, "Initial set of candidates:\n");
4870       iv_ca_dump (data, dump_file, set);
4871     }
4872
4873   while (try_improve_iv_set (data, set))
4874     {
4875       if (dump_file && (dump_flags & TDF_DETAILS))
4876         {
4877           fprintf (dump_file, "Improved to:\n");
4878           iv_ca_dump (data, dump_file, set);
4879         }
4880     }
4881
4882   if (dump_file && (dump_flags & TDF_DETAILS))
4883     {
4884       comp_cost cost = iv_ca_cost (set);
4885       fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4886     }
4887
4888   for (i = 0; i < n_iv_uses (data); i++)
4889     {
4890       use = iv_use (data, i);
4891       use->selected = iv_ca_cand_for_use (set, use)->cand;
4892     }
4893
4894   return set;
4895 }
4896
4897 /* Creates a new induction variable corresponding to CAND.  */
4898
4899 static void
4900 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4901 {
4902   block_stmt_iterator incr_pos;
4903   tree base;
4904   bool after = false;
4905
4906   if (!cand->iv)
4907     return;
4908
4909   switch (cand->pos)
4910     {
4911     case IP_NORMAL:
4912       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4913       break;
4914
4915     case IP_END:
4916       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4917       after = true;
4918       break;
4919
4920     case IP_ORIGINAL:
4921       /* Mark that the iv is preserved.  */
4922       name_info (data, cand->var_before)->preserve_biv = true;
4923       name_info (data, cand->var_after)->preserve_biv = true;
4924
4925       /* Rewrite the increment so that it uses var_before directly.  */
4926       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4927
4928       return;
4929     }
4930
4931   gimple_add_tmp_var (cand->var_before);
4932   add_referenced_var (cand->var_before);
4933
4934   base = unshare_expr (cand->iv->base);
4935
4936   create_iv (base, unshare_expr (cand->iv->step),
4937              cand->var_before, data->current_loop,
4938              &incr_pos, after, &cand->var_before, &cand->var_after);
4939 }
4940
4941 /* Creates new induction variables described in SET.  */
4942
4943 static void
4944 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4945 {
4946   unsigned i;
4947   struct iv_cand *cand;
4948   bitmap_iterator bi;
4949
4950   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4951     {
4952       cand = iv_cand (data, i);
4953       create_new_iv (data, cand);
4954     }
4955 }
4956
4957 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4958    is true, remove also the ssa name defined by the statement.  */
4959
4960 static void
4961 remove_statement (tree stmt, bool including_defined_name)
4962 {
4963   if (TREE_CODE (stmt) == PHI_NODE)
4964     {
4965       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4966     }
4967   else
4968     {
4969       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4970
4971       bsi_remove (&bsi, true);
4972       release_defs (stmt);
4973     }
4974 }
4975
4976 /* Rewrites USE (definition of iv used in a nonlinear expression)
4977    using candidate CAND.  */
4978
4979 static void
4980 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4981                             struct iv_use *use, struct iv_cand *cand)
4982 {
4983   tree comp;
4984   tree op, tgt, ass;
4985   block_stmt_iterator bsi;
4986
4987   /* An important special case -- if we are asked to express value of
4988      the original iv by itself, just exit; there is no need to
4989      introduce a new computation (that might also need casting the
4990      variable to unsigned and back).  */
4991   if (cand->pos == IP_ORIGINAL
4992       && cand->incremented_at == use->stmt)
4993     {
4994       tree step, ctype, utype;
4995       enum tree_code incr_code = PLUS_EXPR;
4996
4997       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4998       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4999
5000       step = cand->iv->step;
5001       ctype = TREE_TYPE (step);
5002       utype = TREE_TYPE (cand->var_after);
5003       if (TREE_CODE (step) == NEGATE_EXPR)
5004         {
5005           incr_code = MINUS_EXPR;
5006           step = TREE_OPERAND (step, 0);
5007         }
5008
5009       /* Check whether we may leave the computation unchanged.
5010          This is the case only if it does not rely on other
5011          computations in the loop -- otherwise, the computation
5012          we rely upon may be removed in remove_unused_ivs,
5013          thus leading to ICE.  */
5014       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
5015       if (TREE_CODE (op) == PLUS_EXPR
5016           || TREE_CODE (op) == MINUS_EXPR
5017           || TREE_CODE (op) == POINTER_PLUS_EXPR)
5018         {
5019           if (TREE_OPERAND (op, 0) == cand->var_before)
5020             op = TREE_OPERAND (op, 1);
5021           else if (TREE_CODE (op) != MINUS_EXPR
5022                    && TREE_OPERAND (op, 1) == cand->var_before)
5023             op = TREE_OPERAND (op, 0);
5024           else
5025             op = NULL_TREE;
5026         }
5027       else
5028         op = NULL_TREE;
5029
5030       if (op
5031           && (TREE_CODE (op) == INTEGER_CST
5032               || operand_equal_p (op, step, 0)))
5033         return;
5034
5035       /* Otherwise, add the necessary computations to express
5036          the iv.  */
5037       op = fold_convert (ctype, cand->var_before);
5038       comp = fold_convert (utype,
5039                            build2 (incr_code, ctype, op,
5040                                    unshare_expr (step)));
5041     }
5042   else
5043     {
5044       comp = get_computation (data->current_loop, use, cand);
5045       gcc_assert (comp != NULL_TREE);
5046     }
5047
5048   switch (TREE_CODE (use->stmt))
5049     {
5050     case PHI_NODE:
5051       tgt = PHI_RESULT (use->stmt);
5052
5053       /* If we should keep the biv, do not replace it.  */
5054       if (name_info (data, tgt)->preserve_biv)
5055         return;
5056
5057       bsi = bsi_after_labels (bb_for_stmt (use->stmt));
5058       break;
5059
5060     case GIMPLE_MODIFY_STMT:
5061       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
5062       bsi = bsi_for_stmt (use->stmt);
5063       break;
5064
5065     default:
5066       gcc_unreachable ();
5067     }
5068
5069   op = force_gimple_operand_bsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5070                                  true, BSI_SAME_STMT);
5071
5072   if (TREE_CODE (use->stmt) == PHI_NODE)
5073     {
5074       ass = build_gimple_modify_stmt (tgt, op);
5075       bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
5076       remove_statement (use->stmt, false);
5077       SSA_NAME_DEF_STMT (tgt) = ass;
5078     }
5079   else
5080     GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
5081 }
5082
5083 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5084    for_each_index.  */
5085
5086 static bool
5087 idx_remove_ssa_names (tree base, tree *idx,
5088                       void *data ATTRIBUTE_UNUSED)
5089 {
5090   tree *op;
5091
5092   if (TREE_CODE (*idx) == SSA_NAME)
5093     *idx = SSA_NAME_VAR (*idx);
5094
5095   if (TREE_CODE (base) == ARRAY_REF)
5096     {
5097       op = &TREE_OPERAND (base, 2);
5098       if (*op
5099           && TREE_CODE (*op) == SSA_NAME)
5100         *op = SSA_NAME_VAR (*op);
5101       op = &TREE_OPERAND (base, 3);
5102       if (*op
5103           && TREE_CODE (*op) == SSA_NAME)
5104         *op = SSA_NAME_VAR (*op);
5105     }
5106
5107   return true;
5108 }
5109
5110 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5111
5112 static tree
5113 unshare_and_remove_ssa_names (tree ref)
5114 {
5115   ref = unshare_expr (ref);
5116   for_each_index (&ref, idx_remove_ssa_names, NULL);
5117
5118   return ref;
5119 }
5120
5121 /* Extract the alias analysis info for the memory reference REF.  There are
5122    several ways how this information may be stored and what precisely is
5123    its semantics depending on the type of the reference, but there always is
5124    somewhere hidden one _DECL node that is used to determine the set of
5125    virtual operands for the reference.  The code below deciphers this jungle
5126    and extracts this single useful piece of information.  */
5127
5128 static tree
5129 get_ref_tag (tree ref, tree orig)
5130 {
5131   tree var = get_base_address (ref);
5132   tree aref = NULL_TREE, tag, sv;
5133   HOST_WIDE_INT offset, size, maxsize;
5134
5135   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5136     {
5137       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5138       if (ref)
5139         break;
5140     }
5141
5142   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5143     return aref;
5144
5145   if (!var)
5146     return NULL_TREE;
5147
5148   if (TREE_CODE (var) == INDIRECT_REF)
5149     {
5150       /* If the base is a dereference of a pointer, first check its name memory
5151          tag.  If it does not have one, use its symbol memory tag.  */
5152       var = TREE_OPERAND (var, 0);
5153       if (TREE_CODE (var) != SSA_NAME)
5154         return NULL_TREE;
5155
5156       if (SSA_NAME_PTR_INFO (var))
5157         {
5158           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5159           if (tag)
5160             return tag;
5161         }
5162
5163       var = SSA_NAME_VAR (var);
5164       tag = symbol_mem_tag (var);
5165       gcc_assert (tag != NULL_TREE);
5166       return tag;
5167     }
5168   else
5169     {
5170       if (!DECL_P (var))
5171         return NULL_TREE;
5172
5173       tag = symbol_mem_tag (var);
5174       if (tag)
5175         return tag;
5176
5177       return var;
5178     }
5179 }
5180
5181 /* Copies the reference information from OLD_REF to NEW_REF.  */
5182
5183 static void
5184 copy_ref_info (tree new_ref, tree old_ref)
5185 {
5186   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5187     copy_mem_ref_info (new_ref, old_ref);
5188   else
5189     {
5190       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5191       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5192     }
5193 }
5194
5195 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5196
5197 static void
5198 rewrite_use_address (struct ivopts_data *data,
5199                      struct iv_use *use, struct iv_cand *cand)
5200 {
5201   aff_tree aff;
5202   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5203   tree ref;
5204   bool ok;
5205
5206   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5207   gcc_assert (ok);
5208   unshare_aff_combination (&aff);
5209
5210   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5211   copy_ref_info (ref, *use->op_p);
5212   *use->op_p = ref;
5213 }
5214
5215 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5216    candidate CAND.  */
5217
5218 static void
5219 rewrite_use_compare (struct ivopts_data *data,
5220                      struct iv_use *use, struct iv_cand *cand)
5221 {
5222   tree comp, *var_p, op, bound;
5223   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5224   enum tree_code compare;
5225   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5226   bool ok;
5227
5228   bound = cp->value;
5229   if (bound)
5230     {
5231       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5232       tree var_type = TREE_TYPE (var);
5233
5234       compare = iv_elimination_compare (data, use);
5235       bound = unshare_expr (fold_convert (var_type, bound));
5236       op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE,
5237                                      true, BSI_SAME_STMT);
5238
5239       *use->op_p = build2 (compare, boolean_type_node, var, op);
5240       return;
5241     }
5242
5243   /* The induction variable elimination failed; just express the original
5244      giv.  */
5245   comp = get_computation (data->current_loop, use, cand);
5246   gcc_assert (comp != NULL_TREE);
5247
5248   ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5249   gcc_assert (ok);
5250
5251   *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5252                                      true, BSI_SAME_STMT);
5253 }
5254
5255 /* Rewrites USE using candidate CAND.  */
5256
5257 static void
5258 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5259 {
5260   push_stmt_changes (&use->stmt);
5261
5262   switch (use->type)
5263     {
5264       case USE_NONLINEAR_EXPR:
5265         rewrite_use_nonlinear_expr (data, use, cand);
5266         break;
5267
5268       case USE_ADDRESS:
5269         rewrite_use_address (data, use, cand);
5270         break;
5271
5272       case USE_COMPARE:
5273         rewrite_use_compare (data, use, cand);
5274         break;
5275
5276       default:
5277         gcc_unreachable ();
5278     }
5279
5280   pop_stmt_changes (&use->stmt);
5281 }
5282
5283 /* Rewrite the uses using the selected induction variables.  */
5284
5285 static void
5286 rewrite_uses (struct ivopts_data *data)
5287 {
5288   unsigned i;
5289   struct iv_cand *cand;
5290   struct iv_use *use;
5291
5292   for (i = 0; i < n_iv_uses (data); i++)
5293     {
5294       use = iv_use (data, i);
5295       cand = use->selected;
5296       gcc_assert (cand);
5297
5298       rewrite_use (data, use, cand);
5299     }
5300 }
5301
5302 /* Removes the ivs that are not used after rewriting.  */
5303
5304 static void
5305 remove_unused_ivs (struct ivopts_data *data)
5306 {
5307   unsigned j;
5308   bitmap_iterator bi;
5309
5310   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5311     {
5312       struct version_info *info;
5313
5314       info = ver_info (data, j);
5315       if (info->iv
5316           && !integer_zerop (info->iv->step)
5317           && !info->inv_id
5318           && !info->iv->have_use_for
5319           && !info->preserve_biv)
5320         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5321     }
5322 }
5323
5324 /* Frees data allocated by the optimization of a single loop.  */
5325
5326 static void
5327 free_loop_data (struct ivopts_data *data)
5328 {
5329   unsigned i, j;
5330   bitmap_iterator bi;
5331   tree obj;
5332
5333   if (data->niters)
5334     {
5335       pointer_map_destroy (data->niters);
5336       data->niters = NULL;
5337     }
5338
5339   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5340     {
5341       struct version_info *info;
5342
5343       info = ver_info (data, i);
5344       if (info->iv)
5345         free (info->iv);
5346       info->iv = NULL;
5347       info->has_nonlin_use = false;
5348       info->preserve_biv = false;
5349       info->inv_id = 0;
5350     }
5351   bitmap_clear (data->relevant);
5352   bitmap_clear (data->important_candidates);
5353
5354   for (i = 0; i < n_iv_uses (data); i++)
5355     {
5356       struct iv_use *use = iv_use (data, i);
5357
5358       free (use->iv);
5359       BITMAP_FREE (use->related_cands);
5360       for (j = 0; j < use->n_map_members; j++)
5361         if (use->cost_map[j].depends_on)
5362           BITMAP_FREE (use->cost_map[j].depends_on);
5363       free (use->cost_map);
5364       free (use);
5365     }
5366   VEC_truncate (iv_use_p, data->iv_uses, 0);
5367
5368   for (i = 0; i < n_iv_cands (data); i++)
5369     {
5370       struct iv_cand *cand = iv_cand (data, i);
5371
5372       if (cand->iv)
5373         free (cand->iv);
5374       if (cand->depends_on)
5375         BITMAP_FREE (cand->depends_on);
5376       free (cand);
5377     }
5378   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5379
5380   if (data->version_info_size < num_ssa_names)
5381     {
5382       data->version_info_size = 2 * num_ssa_names;
5383       free (data->version_info);
5384       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5385     }
5386
5387   data->max_inv_id = 0;
5388
5389   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5390     SET_DECL_RTL (obj, NULL_RTX);
5391
5392   VEC_truncate (tree, decl_rtl_to_reset, 0);
5393 }
5394
5395 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5396    loop tree.  */
5397
5398 static void
5399 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5400 {
5401   free_loop_data (data);
5402   free (data->version_info);
5403   BITMAP_FREE (data->relevant);
5404   BITMAP_FREE (data->important_candidates);
5405
5406   VEC_free (tree, heap, decl_rtl_to_reset);
5407   VEC_free (iv_use_p, heap, data->iv_uses);
5408   VEC_free (iv_cand_p, heap, data->iv_candidates);
5409 }
5410
5411 /* Optimizes the LOOP.  Returns true if anything changed.  */
5412
5413 static bool
5414 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5415 {
5416   bool changed = false;
5417   struct iv_ca *iv_ca;
5418   edge exit;
5419
5420   gcc_assert (!data->niters);
5421   data->current_loop = loop;
5422
5423   if (dump_file && (dump_flags & TDF_DETAILS))
5424     {
5425       fprintf (dump_file, "Processing loop %d\n", loop->num);
5426
5427       exit = single_dom_exit (loop);
5428       if (exit)
5429         {
5430           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5431                    exit->src->index, exit->dest->index);
5432           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5433           fprintf (dump_file, "\n");
5434         }
5435
5436       fprintf (dump_file, "\n");
5437     }
5438
5439   /* For each ssa name determines whether it behaves as an induction variable
5440      in some loop.  */
5441   if (!find_induction_variables (data))
5442     goto finish;
5443
5444   /* Finds interesting uses (item 1).  */
5445   find_interesting_uses (data);
5446   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5447     goto finish;
5448
5449   /* Finds candidates for the induction variables (item 2).  */
5450   find_iv_candidates (data);
5451
5452   /* Calculates the costs (item 3, part 1).  */
5453   determine_use_iv_costs (data);
5454   determine_iv_costs (data);
5455   determine_set_costs (data);
5456
5457   /* Find the optimal set of induction variables (item 3, part 2).  */
5458   iv_ca = find_optimal_iv_set (data);
5459   if (!iv_ca)
5460     goto finish;
5461   changed = true;
5462
5463   /* Create the new induction variables (item 4, part 1).  */
5464   create_new_ivs (data, iv_ca);
5465   iv_ca_free (&iv_ca);
5466
5467   /* Rewrite the uses (item 4, part 2).  */
5468   rewrite_uses (data);
5469
5470   /* Remove the ivs that are unused after rewriting.  */
5471   remove_unused_ivs (data);
5472
5473   /* We have changed the structure of induction variables; it might happen
5474      that definitions in the scev database refer to some of them that were
5475      eliminated.  */
5476   scev_reset ();
5477
5478 finish:
5479   free_loop_data (data);
5480
5481   return changed;
5482 }
5483
5484 /* Main entry point.  Optimizes induction variables in loops.  */
5485
5486 void
5487 tree_ssa_iv_optimize (void)
5488 {
5489   struct loop *loop;
5490   struct ivopts_data data;
5491   loop_iterator li;
5492
5493   tree_ssa_iv_optimize_init (&data);
5494
5495   /* Optimize the loops starting with the innermost ones.  */
5496   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5497     {
5498       if (dump_file && (dump_flags & TDF_DETAILS))
5499         flow_loop_dump (loop, dump_file, NULL, 1);
5500
5501       tree_ssa_iv_optimize_loop (&data, loop);
5502     }
5503
5504   tree_ssa_iv_optimize_finalize (&data);
5505 }