gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "output.h"
  73 #include "tree-pretty-print.h"
  74 #include "gimple-pretty-print.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "tree-pass.h"
  80 #include "ggc.h"
  81 #include "insn-config.h"
  82 #include "recog.h"
  83 #include "pointer-set.h"
  84 #include "hashtab.h"
  85 #include "tree-chrec.h"
  86 #include "tree-scalar-evolution.h"
  87 #include "cfgloop.h"
  88 #include "params.h"
  89 #include "langhooks.h"
  90 #include "tree-affine.h"
  91 #include "target.h"
  92 #include "tree-inline.h"
  93 #include "tree-ssa-propagate.h"
  94
  95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  96    cost of different addressing modes.  This should be moved to a TBD
  97    interface between the GIMPLE and RTL worlds.  */
  98 #include "expr.h"
  99
 100 /* The infinite cost.  */
 101 #define INFTY 10000000
 102
 103 #define AVG_LOOP_NITER(LOOP) 5
 104
 105 /* Returns the expected number of loop iterations for LOOP.
 106    The average trip count is computed from profile data if it
 107    exists. */
 108
 109 static inline HOST_WIDE_INT
 110 avg_loop_niter (struct loop *loop)
 111 {
 112   HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
 113   if (niter == -1)
 114     return AVG_LOOP_NITER (loop);
 115
 116   return niter;
 117 }
 118
 119 /* Representation of the induction variable.  */
 120 struct iv
 121 {
 122   tree base;            /* Initial value of the iv.  */
 123   tree base_object;     /* A memory object to that the induction variable points.  */
 124   tree step;            /* Step of the iv (constant only).  */
 125   tree ssa_name;        /* The ssa name with the value.  */
 126   bool biv_p;           /* Is it a biv?  */
 127   bool have_use_for;    /* Do we already have a use for it?  */
 128   unsigned use_id;      /* The identifier in the use if it is the case.  */
 129 };
 130
 131 /* Per-ssa version information (induction variable descriptions, etc.).  */
 132 struct version_info
 133 {
 134   tree name;            /* The ssa name.  */
 135   struct iv *iv;        /* Induction variable description.  */
 136   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 137                            an expression that is not an induction variable.  */
 138   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 139   unsigned inv_id;      /* Id of an invariant.  */
 140 };
 141
 142 /* Types of uses.  */
 143 enum use_type
 144 {
 145   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 146   USE_ADDRESS,          /* Use in an address.  */
 147   USE_COMPARE           /* Use is a compare.  */
 148 };
 149
 150 /* Cost of a computation.  */
 151 typedef struct
 152 {
 153   int cost;             /* The runtime cost.  */
 154   unsigned complexity;  /* The estimate of the complexity of the code for
 155                            the computation (in no concrete units --
 156                            complexity field should be larger for more
 157                            complex expressions and addressing modes).  */
 158 } comp_cost;
 159
 160 static const comp_cost zero_cost = {0, 0};
 161 static const comp_cost infinite_cost = {INFTY, INFTY};
 162
 163 /* The candidate - cost pair.  */
 164 struct cost_pair
 165 {
 166   struct iv_cand *cand; /* The candidate.  */
 167   comp_cost cost;       /* The cost.  */
 168   bitmap depends_on;    /* The list of invariants that have to be
 169                            preserved.  */
 170   tree value;           /* For final value elimination, the expression for
 171                            the final value of the iv.  For iv elimination,
 172                            the new bound to compare with.  */
 173   int inv_expr_id;      /* Loop invariant expression id.  */
 174 };
 175
 176 /* Use.  */
 177 struct iv_use
 178 {
 179   unsigned id;          /* The id of the use.  */
 180   enum use_type type;   /* Type of the use.  */
 181   struct iv *iv;        /* The induction variable it is based on.  */
 182   gimple stmt;          /* Statement in that it occurs.  */
 183   tree *op_p;           /* The place where it occurs.  */
 184   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 185                            important ones.  */
 186
 187   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 188   struct cost_pair *cost_map;
 189                         /* The costs wrto the iv candidates.  */
 190
 191   struct iv_cand *selected;
 192                         /* The selected candidate.  */
 193 };
 194
 195 /* The position where the iv is computed.  */
 196 enum iv_position
 197 {
 198   IP_NORMAL,            /* At the end, just before the exit condition.  */
 199   IP_END,               /* At the end of the latch block.  */
 200   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 201   IP_AFTER_USE,         /* Immediately after a specific use.  */
 202   IP_ORIGINAL           /* The original biv.  */
 203 };
 204
 205 /* The induction variable candidate.  */
 206 struct iv_cand
 207 {
 208   unsigned id;          /* The number of the candidate.  */
 209   bool important;       /* Whether this is an "important" candidate, i.e. such
 210                            that it should be considered by all uses.  */
 211   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 212   gimple incremented_at;/* For original biv, the statement where it is
 213                            incremented.  */
 214   tree var_before;      /* The variable used for it before increment.  */
 215   tree var_after;       /* The variable used for it after increment.  */
 216   struct iv *iv;        /* The value of the candidate.  NULL for
 217                            "pseudocandidate" used to indicate the possibility
 218                            to replace the final value of an iv by direct
 219                            computation of the value.  */
 220   unsigned cost;        /* Cost of the candidate.  */
 221   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 222   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 223                               where it is incremented.  */
 224   bitmap depends_on;    /* The list of invariants that are used in step of the
 225                            biv.  */
 226 };
 227
 228 /* Loop invariant expression hashtable entry.  */
 229 struct iv_inv_expr_ent
 230 {
 231   tree expr;
 232   int id;
 233   hashval_t hash;
 234 };
 235
 236 /* The data used by the induction variable optimizations.  */
 237
 238 typedef struct iv_use *iv_use_p;
 239 DEF_VEC_P(iv_use_p);
 240 DEF_VEC_ALLOC_P(iv_use_p,heap);
 241
 242 typedef struct iv_cand *iv_cand_p;
 243 DEF_VEC_P(iv_cand_p);
 244 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 245
 246 struct ivopts_data
 247 {
 248   /* The currently optimized loop.  */
 249   struct loop *current_loop;
 250
 251   /* Numbers of iterations for all exits of the current loop.  */
 252   struct pointer_map_t *niters;
 253
 254   /* Number of registers used in it.  */
 255   unsigned regs_used;
 256
 257   /* The size of version_info array allocated.  */
 258   unsigned version_info_size;
 259
 260   /* The array of information for the ssa names.  */
 261   struct version_info *version_info;
 262
 263   /* The hashtable of loop invariant expressions created
 264      by ivopt.  */
 265   htab_t inv_expr_tab;
 266
 267   /* Loop invariant expression id.  */
 268   int inv_expr_id;
 269
 270   /* The bitmap of indices in version_info whose value was changed.  */
 271   bitmap relevant;
 272
 273   /* The uses of induction variables.  */
 274   VEC(iv_use_p,heap) *iv_uses;
 275
 276   /* The candidates.  */
 277   VEC(iv_cand_p,heap) *iv_candidates;
 278
 279   /* A bitmap of important candidates.  */
 280   bitmap important_candidates;
 281
 282   /* The maximum invariant id.  */
 283   unsigned max_inv_id;
 284
 285   /* Whether to consider just related and important candidates when replacing a
 286      use.  */
 287   bool consider_all_candidates;
 288
 289   /* Are we optimizing for speed?  */
 290   bool speed;
 291
 292   /* Whether the loop body includes any function calls.  */
 293   bool body_includes_call;
 294 };
 295
 296 /* An assignment of iv candidates to uses.  */
 297
 298 struct iv_ca
 299 {
 300   /* The number of uses covered by the assignment.  */
 301   unsigned upto;
 302
 303   /* Number of uses that cannot be expressed by the candidates in the set.  */
 304   unsigned bad_uses;
 305
 306   /* Candidate assigned to a use, together with the related costs.  */
 307   struct cost_pair **cand_for_use;
 308
 309   /* Number of times each candidate is used.  */
 310   unsigned *n_cand_uses;
 311
 312   /* The candidates used.  */
 313   bitmap cands;
 314
 315   /* The number of candidates in the set.  */
 316   unsigned n_cands;
 317
 318   /* Total number of registers needed.  */
 319   unsigned n_regs;
 320
 321   /* Total cost of expressing uses.  */
 322   comp_cost cand_use_cost;
 323
 324   /* Total cost of candidates.  */
 325   unsigned cand_cost;
 326
 327   /* Number of times each invariant is used.  */
 328   unsigned *n_invariant_uses;
 329
 330   /* The array holding the number of uses of each loop
 331      invariant expressions created by ivopt.  */
 332   unsigned *used_inv_expr;
 333
 334   /* The number of created loop invariants.  */
 335   unsigned num_used_inv_expr;
 336
 337   /* Total cost of the assignment.  */
 338   comp_cost cost;
 339 };
 340
 341 /* Difference of two iv candidate assignments.  */
 342
 343 struct iv_ca_delta
 344 {
 345   /* Changed use.  */
 346   struct iv_use *use;
 347
 348   /* An old assignment (for rollback purposes).  */
 349   struct cost_pair *old_cp;
 350
 351   /* A new assignment.  */
 352   struct cost_pair *new_cp;
 353
 354   /* Next change in the list.  */
 355   struct iv_ca_delta *next_change;
 356 };
 357
 358 /* Bound on number of candidates below that all candidates are considered.  */
 359
 360 #define CONSIDER_ALL_CANDIDATES_BOUND \
 361   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 362
 363 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 364    optimizing such a loop would help, and it would take ages).  */
 365
 366 #define MAX_CONSIDERED_USES \
 367   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 368
 369 /* If there are at most this number of ivs in the set, try removing unnecessary
 370    ivs from the set always.  */
 371
 372 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 373   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 374
 375 /* The list of trees for that the decl_rtl field must be reset is stored
 376    here.  */
 377
 378 static VEC(tree,heap) *decl_rtl_to_reset;
 379
 380 /* Number of uses recorded in DATA.  */
 381
 382 static inline unsigned
 383 n_iv_uses (struct ivopts_data *data)
 384 {
 385   return VEC_length (iv_use_p, data->iv_uses);
 386 }
 387
 388 /* Ith use recorded in DATA.  */
 389
 390 static inline struct iv_use *
 391 iv_use (struct ivopts_data *data, unsigned i)
 392 {
 393   return VEC_index (iv_use_p, data->iv_uses, i);
 394 }
 395
 396 /* Number of candidates recorded in DATA.  */
 397
 398 static inline unsigned
 399 n_iv_cands (struct ivopts_data *data)
 400 {
 401   return VEC_length (iv_cand_p, data->iv_candidates);
 402 }
 403
 404 /* Ith candidate recorded in DATA.  */
 405
 406 static inline struct iv_cand *
 407 iv_cand (struct ivopts_data *data, unsigned i)
 408 {
 409   return VEC_index (iv_cand_p, data->iv_candidates, i);
 410 }
 411
 412 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 413
 414 edge
 415 single_dom_exit (struct loop *loop)
 416 {
 417   edge exit = single_exit (loop);
 418
 419   if (!exit)
 420     return NULL;
 421
 422   if (!just_once_each_iteration_p (loop, exit->src))
 423     return NULL;
 424
 425   return exit;
 426 }
 427
 428 /* Dumps information about the induction variable IV to FILE.  */
 429
 430 extern void dump_iv (FILE *, struct iv *);
 431 void
 432 dump_iv (FILE *file, struct iv *iv)
 433 {
 434   if (iv->ssa_name)
 435     {
 436       fprintf (file, "ssa name ");
 437       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 438       fprintf (file, "\n");
 439     }
 440
 441   fprintf (file, "  type ");
 442   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 443   fprintf (file, "\n");
 444
 445   if (iv->step)
 446     {
 447       fprintf (file, "  base ");
 448       print_generic_expr (file, iv->base, TDF_SLIM);
 449       fprintf (file, "\n");
 450
 451       fprintf (file, "  step ");
 452       print_generic_expr (file, iv->step, TDF_SLIM);
 453       fprintf (file, "\n");
 454     }
 455   else
 456     {
 457       fprintf (file, "  invariant ");
 458       print_generic_expr (file, iv->base, TDF_SLIM);
 459       fprintf (file, "\n");
 460     }
 461
 462   if (iv->base_object)
 463     {
 464       fprintf (file, "  base object ");
 465       print_generic_expr (file, iv->base_object, TDF_SLIM);
 466       fprintf (file, "\n");
 467     }
 468
 469   if (iv->biv_p)
 470     fprintf (file, "  is a biv\n");
 471 }
 472
 473 /* Dumps information about the USE to FILE.  */
 474
 475 extern void dump_use (FILE *, struct iv_use *);
 476 void
 477 dump_use (FILE *file, struct iv_use *use)
 478 {
 479   fprintf (file, "use %d\n", use->id);
 480
 481   switch (use->type)
 482     {
 483     case USE_NONLINEAR_EXPR:
 484       fprintf (file, "  generic\n");
 485       break;
 486
 487     case USE_ADDRESS:
 488       fprintf (file, "  address\n");
 489       break;
 490
 491     case USE_COMPARE:
 492       fprintf (file, "  compare\n");
 493       break;
 494
 495     default:
 496       gcc_unreachable ();
 497     }
 498
 499   fprintf (file, "  in statement ");
 500   print_gimple_stmt (file, use->stmt, 0, 0);
 501   fprintf (file, "\n");
 502
 503   fprintf (file, "  at position ");
 504   if (use->op_p)
 505     print_generic_expr (file, *use->op_p, TDF_SLIM);
 506   fprintf (file, "\n");
 507
 508   dump_iv (file, use->iv);
 509
 510   if (use->related_cands)
 511     {
 512       fprintf (file, "  related candidates ");
 513       dump_bitmap (file, use->related_cands);
 514     }
 515 }
 516
 517 /* Dumps information about the uses to FILE.  */
 518
 519 extern void dump_uses (FILE *, struct ivopts_data *);
 520 void
 521 dump_uses (FILE *file, struct ivopts_data *data)
 522 {
 523   unsigned i;
 524   struct iv_use *use;
 525
 526   for (i = 0; i < n_iv_uses (data); i++)
 527     {
 528       use = iv_use (data, i);
 529
 530       dump_use (file, use);
 531       fprintf (file, "\n");
 532     }
 533 }
 534
 535 /* Dumps information about induction variable candidate CAND to FILE.  */
 536
 537 extern void dump_cand (FILE *, struct iv_cand *);
 538 void
 539 dump_cand (FILE *file, struct iv_cand *cand)
 540 {
 541   struct iv *iv = cand->iv;
 542
 543   fprintf (file, "candidate %d%s\n",
 544            cand->id, cand->important ? " (important)" : "");
 545
 546   if (cand->depends_on)
 547     {
 548       fprintf (file, "  depends on ");
 549       dump_bitmap (file, cand->depends_on);
 550     }
 551
 552   if (!iv)
 553     {
 554       fprintf (file, "  final value replacement\n");
 555       return;
 556     }
 557
 558   if (cand->var_before)
 559     {
 560       fprintf (file, "  var_before ");
 561       print_generic_expr (file, cand->var_before, TDF_SLIM);
 562       fprintf (file, "\n");
 563     }
 564   if (cand->var_after)
 565     {
 566       fprintf (file, "  var_after ");
 567       print_generic_expr (file, cand->var_after, TDF_SLIM);
 568       fprintf (file, "\n");
 569     }
 570
 571   switch (cand->pos)
 572     {
 573     case IP_NORMAL:
 574       fprintf (file, "  incremented before exit test\n");
 575       break;
 576
 577     case IP_BEFORE_USE:
 578       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 579       break;
 580
 581     case IP_AFTER_USE:
 582       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 583       break;
 584
 585     case IP_END:
 586       fprintf (file, "  incremented at end\n");
 587       break;
 588
 589     case IP_ORIGINAL:
 590       fprintf (file, "  original biv\n");
 591       break;
 592     }
 593
 594   dump_iv (file, iv);
 595 }
 596
 597 /* Returns the info for ssa version VER.  */
 598
 599 static inline struct version_info *
 600 ver_info (struct ivopts_data *data, unsigned ver)
 601 {
 602   return data->version_info + ver;
 603 }
 604
 605 /* Returns the info for ssa name NAME.  */
 606
 607 static inline struct version_info *
 608 name_info (struct ivopts_data *data, tree name)
 609 {
 610   return ver_info (data, SSA_NAME_VERSION (name));
 611 }
 612
 613 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 614    emitted in LOOP.  */
 615
 616 static bool
 617 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 618 {
 619   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 620
 621   gcc_assert (bb);
 622
 623   if (sbb == loop->latch)
 624     return true;
 625
 626   if (sbb != bb)
 627     return false;
 628
 629   return stmt == last_stmt (bb);
 630 }
 631
 632 /* Returns true if STMT if after the place where the original induction
 633    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 634    if the positions are identical.  */
 635
 636 static bool
 637 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 638 {
 639   basic_block cand_bb = gimple_bb (cand->incremented_at);
 640   basic_block stmt_bb = gimple_bb (stmt);
 641
 642   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 643     return false;
 644
 645   if (stmt_bb != cand_bb)
 646     return true;
 647
 648   if (true_if_equal
 649       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 650     return true;
 651   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 652 }
 653
 654 /* Returns true if STMT if after the place where the induction variable
 655    CAND is incremented in LOOP.  */
 656
 657 static bool
 658 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 659 {
 660   switch (cand->pos)
 661     {
 662     case IP_END:
 663       return false;
 664
 665     case IP_NORMAL:
 666       return stmt_after_ip_normal_pos (loop, stmt);
 667
 668     case IP_ORIGINAL:
 669     case IP_AFTER_USE:
 670       return stmt_after_inc_pos (cand, stmt, false);
 671
 672     case IP_BEFORE_USE:
 673       return stmt_after_inc_pos (cand, stmt, true);
 674
 675     default:
 676       gcc_unreachable ();
 677     }
 678 }
 679
 680 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 681
 682 static bool
 683 abnormal_ssa_name_p (tree exp)
 684 {
 685   if (!exp)
 686     return false;
 687
 688   if (TREE_CODE (exp) != SSA_NAME)
 689     return false;
 690
 691   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 692 }
 693
 694 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 695    abnormal phi node.  Callback for for_each_index.  */
 696
 697 static bool
 698 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 699                                   void *data ATTRIBUTE_UNUSED)
 700 {
 701   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 702     {
 703       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 704         return false;
 705       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 706         return false;
 707     }
 708
 709   return !abnormal_ssa_name_p (*index);
 710 }
 711
 712 /* Returns true if EXPR contains a ssa name that occurs in an
 713    abnormal phi node.  */
 714
 715 bool
 716 contains_abnormal_ssa_name_p (tree expr)
 717 {
 718   enum tree_code code;
 719   enum tree_code_class codeclass;
 720
 721   if (!expr)
 722     return false;
 723
 724   code = TREE_CODE (expr);
 725   codeclass = TREE_CODE_CLASS (code);
 726
 727   if (code == SSA_NAME)
 728     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 729
 730   if (code == INTEGER_CST
 731       || is_gimple_min_invariant (expr))
 732     return false;
 733
 734   if (code == ADDR_EXPR)
 735     return !for_each_index (&TREE_OPERAND (expr, 0),
 736                             idx_contains_abnormal_ssa_name_p,
 737                             NULL);
 738
 739   if (code == COND_EXPR)
 740     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 741       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 742       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 743
 744   switch (codeclass)
 745     {
 746     case tcc_binary:
 747     case tcc_comparison:
 748       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 749         return true;
 750
 751       /* Fallthru.  */
 752     case tcc_unary:
 753       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 754         return true;
 755
 756       break;
 757
 758     default:
 759       gcc_unreachable ();
 760     }
 761
 762   return false;
 763 }
 764
 765 /*  Returns tree describing number of iterations determined from
 766     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 767
 768 static tree
 769 niter_for_exit (struct ivopts_data *data, edge exit,
 770                 struct tree_niter_desc **desc_p)
 771 {
 772   struct tree_niter_desc* desc = NULL;
 773   tree niter;
 774   void **slot;
 775
 776   if (!data->niters)
 777     {
 778       data->niters = pointer_map_create ();
 779       slot = NULL;
 780     }
 781   else
 782     slot = pointer_map_contains (data->niters, exit);
 783
 784   if (!slot)
 785     {
 786       /* Try to determine number of iterations.  We must know it
 787          unconditionally (i.e., without possibility of # of iterations
 788          being zero).  Also, we cannot safely work with ssa names that
 789          appear in phi nodes on abnormal edges, so that we do not create
 790          overlapping life ranges for them (PR 27283).  */
 791       desc = XNEW (struct tree_niter_desc);
 792       if (number_of_iterations_exit (data->current_loop,
 793                                      exit, desc, true)
 794           && integer_zerop (desc->may_be_zero)
 795           && !contains_abnormal_ssa_name_p (desc->niter))
 796         niter = desc->niter;
 797       else
 798         niter = NULL_TREE;
 799
 800       desc->niter = niter;
 801       slot = pointer_map_insert (data->niters, exit);
 802       *slot = desc;
 803     }
 804   else
 805     niter = ((struct tree_niter_desc *) *slot)->niter;
 806
 807   if (desc_p)
 808     *desc_p = (struct tree_niter_desc *) *slot;
 809   return niter;
 810 }
 811
 812 /* Returns tree describing number of iterations determined from
 813    single dominating exit of DATA->current_loop, or NULL if something
 814    goes wrong.  */
 815
 816 static tree
 817 niter_for_single_dom_exit (struct ivopts_data *data)
 818 {
 819   edge exit = single_dom_exit (data->current_loop);
 820
 821   if (!exit)
 822     return NULL;
 823
 824   return niter_for_exit (data, exit, NULL);
 825 }
 826
 827 /* Hash table equality function for expressions.  */
 828
 829 static int
 830 htab_inv_expr_eq (const void *ent1, const void *ent2)
 831 {
 832   const struct iv_inv_expr_ent *expr1 =
 833       (const struct iv_inv_expr_ent *)ent1;
 834   const struct iv_inv_expr_ent *expr2 =
 835       (const struct iv_inv_expr_ent *)ent2;
 836
 837   return expr1->hash == expr2->hash
 838          && operand_equal_p (expr1->expr, expr2->expr, 0);
 839 }
 840
 841 /* Hash function for loop invariant expressions.  */
 842
 843 static hashval_t
 844 htab_inv_expr_hash (const void *ent)
 845 {
 846   const struct iv_inv_expr_ent *expr =
 847       (const struct iv_inv_expr_ent *)ent;
 848   return expr->hash;
 849 }
 850
 851 /* Initializes data structures used by the iv optimization pass, stored
 852    in DATA.  */
 853
 854 static void
 855 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 856 {
 857   data->version_info_size = 2 * num_ssa_names;
 858   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 859   data->relevant = BITMAP_ALLOC (NULL);
 860   data->important_candidates = BITMAP_ALLOC (NULL);
 861   data->max_inv_id = 0;
 862   data->niters = NULL;
 863   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 864   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 865   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 866                                     htab_inv_expr_eq, free);
 867   data->inv_expr_id = 0;
 868   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 869 }
 870
 871 /* Returns a memory object to that EXPR points.  In case we are able to
 872    determine that it does not point to any such object, NULL is returned.  */
 873
 874 static tree
 875 determine_base_object (tree expr)
 876 {
 877   enum tree_code code = TREE_CODE (expr);
 878   tree base, obj;
 879
 880   /* If this is a pointer casted to any type, we need to determine
 881      the base object for the pointer; so handle conversions before
 882      throwing away non-pointer expressions.  */
 883   if (CONVERT_EXPR_P (expr))
 884     return determine_base_object (TREE_OPERAND (expr, 0));
 885
 886   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 887     return NULL_TREE;
 888
 889   switch (code)
 890     {
 891     case INTEGER_CST:
 892       return NULL_TREE;
 893
 894     case ADDR_EXPR:
 895       obj = TREE_OPERAND (expr, 0);
 896       base = get_base_address (obj);
 897
 898       if (!base)
 899         return expr;
 900
 901       if (TREE_CODE (base) == MEM_REF)
 902         return determine_base_object (TREE_OPERAND (base, 0));
 903
 904       return fold_convert (ptr_type_node,
 905                            build_fold_addr_expr (base));
 906
 907     case POINTER_PLUS_EXPR:
 908       return determine_base_object (TREE_OPERAND (expr, 0));
 909
 910     case PLUS_EXPR:
 911     case MINUS_EXPR:
 912       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 913       gcc_unreachable ();
 914
 915     default:
 916       return fold_convert (ptr_type_node, expr);
 917     }
 918 }
 919
 920 /* Allocates an induction variable with given initial value BASE and step STEP
 921    for loop LOOP.  */
 922
 923 static struct iv *
 924 alloc_iv (tree base, tree step)
 925 {
 926   struct iv *iv = XCNEW (struct iv);
 927   gcc_assert (step != NULL_TREE);
 928
 929   iv->base = base;
 930   iv->base_object = determine_base_object (base);
 931   iv->step = step;
 932   iv->biv_p = false;
 933   iv->have_use_for = false;
 934   iv->use_id = 0;
 935   iv->ssa_name = NULL_TREE;
 936
 937   return iv;
 938 }
 939
 940 /* Sets STEP and BASE for induction variable IV.  */
 941
 942 static void
 943 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 944 {
 945   struct version_info *info = name_info (data, iv);
 946
 947   gcc_assert (!info->iv);
 948
 949   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 950   info->iv = alloc_iv (base, step);
 951   info->iv->ssa_name = iv;
 952 }
 953
 954 /* Finds induction variable declaration for VAR.  */
 955
 956 static struct iv *
 957 get_iv (struct ivopts_data *data, tree var)
 958 {
 959   basic_block bb;
 960   tree type = TREE_TYPE (var);
 961
 962   if (!POINTER_TYPE_P (type)
 963       && !INTEGRAL_TYPE_P (type))
 964     return NULL;
 965
 966   if (!name_info (data, var)->iv)
 967     {
 968       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 969
 970       if (!bb
 971           || !flow_bb_inside_loop_p (data->current_loop, bb))
 972         set_iv (data, var, var, build_int_cst (type, 0));
 973     }
 974
 975   return name_info (data, var)->iv;
 976 }
 977
 978 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 979    not define a simple affine biv with nonzero step.  */
 980
 981 static tree
 982 determine_biv_step (gimple phi)
 983 {
 984   struct loop *loop = gimple_bb (phi)->loop_father;
 985   tree name = PHI_RESULT (phi);
 986   affine_iv iv;
 987
 988   if (!is_gimple_reg (name))
 989     return NULL_TREE;
 990
 991   if (!simple_iv (loop, loop, name, &iv, true))
 992     return NULL_TREE;
 993
 994   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 995 }
 996
 997 /* Finds basic ivs.  */
 998
 999 static bool
1000 find_bivs (struct ivopts_data *data)
1001 {
1002   gimple phi;
1003   tree step, type, base;
1004   bool found = false;
1005   struct loop *loop = data->current_loop;
1006   gimple_stmt_iterator psi;
1007
1008   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1009     {
1010       phi = gsi_stmt (psi);
1011
1012       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1013         continue;
1014
1015       step = determine_biv_step (phi);
1016       if (!step)
1017         continue;
1018
1019       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1020       base = expand_simple_operations (base);
1021       if (contains_abnormal_ssa_name_p (base)
1022           || contains_abnormal_ssa_name_p (step))
1023         continue;
1024
1025       type = TREE_TYPE (PHI_RESULT (phi));
1026       base = fold_convert (type, base);
1027       if (step)
1028         {
1029           if (POINTER_TYPE_P (type))
1030             step = fold_convert (sizetype, step);
1031           else
1032             step = fold_convert (type, step);
1033         }
1034
1035       set_iv (data, PHI_RESULT (phi), base, step);
1036       found = true;
1037     }
1038
1039   return found;
1040 }
1041
1042 /* Marks basic ivs.  */
1043
1044 static void
1045 mark_bivs (struct ivopts_data *data)
1046 {
1047   gimple phi;
1048   tree var;
1049   struct iv *iv, *incr_iv;
1050   struct loop *loop = data->current_loop;
1051   basic_block incr_bb;
1052   gimple_stmt_iterator psi;
1053
1054   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1055     {
1056       phi = gsi_stmt (psi);
1057
1058       iv = get_iv (data, PHI_RESULT (phi));
1059       if (!iv)
1060         continue;
1061
1062       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1063       incr_iv = get_iv (data, var);
1064       if (!incr_iv)
1065         continue;
1066
1067       /* If the increment is in the subloop, ignore it.  */
1068       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1069       if (incr_bb->loop_father != data->current_loop
1070           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1071         continue;
1072
1073       iv->biv_p = true;
1074       incr_iv->biv_p = true;
1075     }
1076 }
1077
1078 /* Checks whether STMT defines a linear induction variable and stores its
1079    parameters to IV.  */
1080
1081 static bool
1082 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1083 {
1084   tree lhs;
1085   struct loop *loop = data->current_loop;
1086
1087   iv->base = NULL_TREE;
1088   iv->step = NULL_TREE;
1089
1090   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1091     return false;
1092
1093   lhs = gimple_assign_lhs (stmt);
1094   if (TREE_CODE (lhs) != SSA_NAME)
1095     return false;
1096
1097   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1098     return false;
1099   iv->base = expand_simple_operations (iv->base);
1100
1101   if (contains_abnormal_ssa_name_p (iv->base)
1102       || contains_abnormal_ssa_name_p (iv->step))
1103     return false;
1104
1105   /* If STMT could throw, then do not consider STMT as defining a GIV.
1106      While this will suppress optimizations, we can not safely delete this
1107      GIV and associated statements, even if it appears it is not used.  */
1108   if (stmt_could_throw_p (stmt))
1109     return false;
1110
1111   return true;
1112 }
1113
1114 /* Finds general ivs in statement STMT.  */
1115
1116 static void
1117 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1118 {
1119   affine_iv iv;
1120
1121   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1122     return;
1123
1124   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1125 }
1126
1127 /* Finds general ivs in basic block BB.  */
1128
1129 static void
1130 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1131 {
1132   gimple_stmt_iterator bsi;
1133
1134   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1135     find_givs_in_stmt (data, gsi_stmt (bsi));
1136 }
1137
1138 /* Finds general ivs.  */
1139
1140 static void
1141 find_givs (struct ivopts_data *data)
1142 {
1143   struct loop *loop = data->current_loop;
1144   basic_block *body = get_loop_body_in_dom_order (loop);
1145   unsigned i;
1146
1147   for (i = 0; i < loop->num_nodes; i++)
1148     find_givs_in_bb (data, body[i]);
1149   free (body);
1150 }
1151
1152 /* For each ssa name defined in LOOP determines whether it is an induction
1153    variable and if so, its initial value and step.  */
1154
1155 static bool
1156 find_induction_variables (struct ivopts_data *data)
1157 {
1158   unsigned i;
1159   bitmap_iterator bi;
1160
1161   if (!find_bivs (data))
1162     return false;
1163
1164   find_givs (data);
1165   mark_bivs (data);
1166
1167   if (dump_file && (dump_flags & TDF_DETAILS))
1168     {
1169       tree niter = niter_for_single_dom_exit (data);
1170
1171       if (niter)
1172         {
1173           fprintf (dump_file, "  number of iterations ");
1174           print_generic_expr (dump_file, niter, TDF_SLIM);
1175           fprintf (dump_file, "\n\n");
1176         };
1177
1178       fprintf (dump_file, "Induction variables:\n\n");
1179
1180       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1181         {
1182           if (ver_info (data, i)->iv)
1183             dump_iv (dump_file, ver_info (data, i)->iv);
1184         }
1185     }
1186
1187   return true;
1188 }
1189
1190 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1191
1192 static struct iv_use *
1193 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1194             gimple stmt, enum use_type use_type)
1195 {
1196   struct iv_use *use = XCNEW (struct iv_use);
1197
1198   use->id = n_iv_uses (data);
1199   use->type = use_type;
1200   use->iv = iv;
1201   use->stmt = stmt;
1202   use->op_p = use_p;
1203   use->related_cands = BITMAP_ALLOC (NULL);
1204
1205   /* To avoid showing ssa name in the dumps, if it was not reset by the
1206      caller.  */
1207   iv->ssa_name = NULL_TREE;
1208
1209   if (dump_file && (dump_flags & TDF_DETAILS))
1210     dump_use (dump_file, use);
1211
1212   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1213
1214   return use;
1215 }
1216
1217 /* Checks whether OP is a loop-level invariant and if so, records it.
1218    NONLINEAR_USE is true if the invariant is used in a way we do not
1219    handle specially.  */
1220
1221 static void
1222 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1223 {
1224   basic_block bb;
1225   struct version_info *info;
1226
1227   if (TREE_CODE (op) != SSA_NAME
1228       || !is_gimple_reg (op))
1229     return;
1230
1231   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1232   if (bb
1233       && flow_bb_inside_loop_p (data->current_loop, bb))
1234     return;
1235
1236   info = name_info (data, op);
1237   info->name = op;
1238   info->has_nonlin_use |= nonlinear_use;
1239   if (!info->inv_id)
1240     info->inv_id = ++data->max_inv_id;
1241   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1242 }
1243
1244 /* Checks whether the use OP is interesting and if so, records it.  */
1245
1246 static struct iv_use *
1247 find_interesting_uses_op (struct ivopts_data *data, tree op)
1248 {
1249   struct iv *iv;
1250   struct iv *civ;
1251   gimple stmt;
1252   struct iv_use *use;
1253
1254   if (TREE_CODE (op) != SSA_NAME)
1255     return NULL;
1256
1257   iv = get_iv (data, op);
1258   if (!iv)
1259     return NULL;
1260
1261   if (iv->have_use_for)
1262     {
1263       use = iv_use (data, iv->use_id);
1264
1265       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1266       return use;
1267     }
1268
1269   if (integer_zerop (iv->step))
1270     {
1271       record_invariant (data, op, true);
1272       return NULL;
1273     }
1274   iv->have_use_for = true;
1275
1276   civ = XNEW (struct iv);
1277   *civ = *iv;
1278
1279   stmt = SSA_NAME_DEF_STMT (op);
1280   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1281               || is_gimple_assign (stmt));
1282
1283   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1284   iv->use_id = use->id;
1285
1286   return use;
1287 }
1288
1289 /* Given a condition in statement STMT, checks whether it is a compare
1290    of an induction variable and an invariant.  If this is the case,
1291    CONTROL_VAR is set to location of the iv, BOUND to the location of
1292    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1293    induction variable descriptions, and true is returned.  If this is not
1294    the case, CONTROL_VAR and BOUND are set to the arguments of the
1295    condition and false is returned.  */
1296
1297 static bool
1298 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1299                        tree **control_var, tree **bound,
1300                        struct iv **iv_var, struct iv **iv_bound)
1301 {
1302   /* The objects returned when COND has constant operands.  */
1303   static struct iv const_iv;
1304   static tree zero;
1305   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1306   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1307   bool ret = false;
1308
1309   if (gimple_code (stmt) == GIMPLE_COND)
1310     {
1311       op0 = gimple_cond_lhs_ptr (stmt);
1312       op1 = gimple_cond_rhs_ptr (stmt);
1313     }
1314   else
1315     {
1316       op0 = gimple_assign_rhs1_ptr (stmt);
1317       op1 = gimple_assign_rhs2_ptr (stmt);
1318     }
1319
1320   zero = integer_zero_node;
1321   const_iv.step = integer_zero_node;
1322
1323   if (TREE_CODE (*op0) == SSA_NAME)
1324     iv0 = get_iv (data, *op0);
1325   if (TREE_CODE (*op1) == SSA_NAME)
1326     iv1 = get_iv (data, *op1);
1327
1328   /* Exactly one of the compared values must be an iv, and the other one must
1329      be an invariant.  */
1330   if (!iv0 || !iv1)
1331     goto end;
1332
1333   if (integer_zerop (iv0->step))
1334     {
1335       /* Control variable may be on the other side.  */
1336       tmp_op = op0; op0 = op1; op1 = tmp_op;
1337       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1338     }
1339   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1340
1341 end:
1342   if (control_var)
1343     *control_var = op0;;
1344   if (iv_var)
1345     *iv_var = iv0;;
1346   if (bound)
1347     *bound = op1;
1348   if (iv_bound)
1349     *iv_bound = iv1;
1350
1351   return ret;
1352 }
1353
1354 /* Checks whether the condition in STMT is interesting and if so,
1355    records it.  */
1356
1357 static void
1358 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1359 {
1360   tree *var_p, *bound_p;
1361   struct iv *var_iv, *civ;
1362
1363   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1364     {
1365       find_interesting_uses_op (data, *var_p);
1366       find_interesting_uses_op (data, *bound_p);
1367       return;
1368     }
1369
1370   civ = XNEW (struct iv);
1371   *civ = *var_iv;
1372   record_use (data, NULL, civ, stmt, USE_COMPARE);
1373 }
1374
1375 /* Returns true if expression EXPR is obviously invariant in LOOP,
1376    i.e. if all its operands are defined outside of the LOOP.  LOOP
1377    should not be the function body.  */
1378
1379 bool
1380 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1381 {
1382   basic_block def_bb;
1383   unsigned i, len;
1384
1385   gcc_assert (loop_depth (loop) > 0);
1386
1387   if (is_gimple_min_invariant (expr))
1388     return true;
1389
1390   if (TREE_CODE (expr) == SSA_NAME)
1391     {
1392       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1393       if (def_bb
1394           && flow_bb_inside_loop_p (loop, def_bb))
1395         return false;
1396
1397       return true;
1398     }
1399
1400   if (!EXPR_P (expr))
1401     return false;
1402
1403   len = TREE_OPERAND_LENGTH (expr);
1404   for (i = 0; i < len; i++)
1405     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1406       return false;
1407
1408   return true;
1409 }
1410
1411 /* Returns true if statement STMT is obviously invariant in LOOP,
1412    i.e. if all its operands on the RHS are defined outside of the LOOP.
1413    LOOP should not be the function body.  */
1414
1415 bool
1416 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1417 {
1418   unsigned i;
1419   tree lhs;
1420
1421   gcc_assert (loop_depth (loop) > 0);
1422
1423   lhs = gimple_get_lhs (stmt);
1424   for (i = 0; i < gimple_num_ops (stmt); i++)
1425     {
1426       tree op = gimple_op (stmt, i);
1427       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1428         return false;
1429     }
1430
1431   return true;
1432 }
1433
1434 /* Cumulates the steps of indices into DATA and replaces their values with the
1435    initial ones.  Returns false when the value of the index cannot be determined.
1436    Callback for for_each_index.  */
1437
1438 struct ifs_ivopts_data
1439 {
1440   struct ivopts_data *ivopts_data;
1441   gimple stmt;
1442   tree step;
1443 };
1444
1445 static bool
1446 idx_find_step (tree base, tree *idx, void *data)
1447 {
1448   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1449   struct iv *iv;
1450   tree step, iv_base, iv_step, lbound, off;
1451   struct loop *loop = dta->ivopts_data->current_loop;
1452
1453   /* If base is a component ref, require that the offset of the reference
1454      be invariant.  */
1455   if (TREE_CODE (base) == COMPONENT_REF)
1456     {
1457       off = component_ref_field_offset (base);
1458       return expr_invariant_in_loop_p (loop, off);
1459     }
1460
1461   /* If base is array, first check whether we will be able to move the
1462      reference out of the loop (in order to take its address in strength
1463      reduction).  In order for this to work we need both lower bound
1464      and step to be loop invariants.  */
1465   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1466     {
1467       /* Moreover, for a range, the size needs to be invariant as well.  */
1468       if (TREE_CODE (base) == ARRAY_RANGE_REF
1469           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1470         return false;
1471
1472       step = array_ref_element_size (base);
1473       lbound = array_ref_low_bound (base);
1474
1475       if (!expr_invariant_in_loop_p (loop, step)
1476           || !expr_invariant_in_loop_p (loop, lbound))
1477         return false;
1478     }
1479
1480   if (TREE_CODE (*idx) != SSA_NAME)
1481     return true;
1482
1483   iv = get_iv (dta->ivopts_data, *idx);
1484   if (!iv)
1485     return false;
1486
1487   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1488           *&x[0], which is not folded and does not trigger the
1489           ARRAY_REF path below.  */
1490   *idx = iv->base;
1491
1492   if (integer_zerop (iv->step))
1493     return true;
1494
1495   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1496     {
1497       step = array_ref_element_size (base);
1498
1499       /* We only handle addresses whose step is an integer constant.  */
1500       if (TREE_CODE (step) != INTEGER_CST)
1501         return false;
1502     }
1503   else
1504     /* The step for pointer arithmetics already is 1 byte.  */
1505     step = size_one_node;
1506
1507   iv_base = iv->base;
1508   iv_step = iv->step;
1509   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1510                             sizetype, &iv_base, &iv_step, dta->stmt,
1511                             false))
1512     {
1513       /* The index might wrap.  */
1514       return false;
1515     }
1516
1517   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1518   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1519
1520   return true;
1521 }
1522
1523 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1524    object is passed to it in DATA.  */
1525
1526 static bool
1527 idx_record_use (tree base, tree *idx,
1528                 void *vdata)
1529 {
1530   struct ivopts_data *data = (struct ivopts_data *) vdata;
1531   find_interesting_uses_op (data, *idx);
1532   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1533     {
1534       find_interesting_uses_op (data, array_ref_element_size (base));
1535       find_interesting_uses_op (data, array_ref_low_bound (base));
1536     }
1537   return true;
1538 }
1539
1540 /* If we can prove that TOP = cst * BOT for some constant cst,
1541    store cst to MUL and return true.  Otherwise return false.
1542    The returned value is always sign-extended, regardless of the
1543    signedness of TOP and BOT.  */
1544
1545 static bool
1546 constant_multiple_of (tree top, tree bot, double_int *mul)
1547 {
1548   tree mby;
1549   enum tree_code code;
1550   double_int res, p0, p1;
1551   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1552
1553   STRIP_NOPS (top);
1554   STRIP_NOPS (bot);
1555
1556   if (operand_equal_p (top, bot, 0))
1557     {
1558       *mul = double_int_one;
1559       return true;
1560     }
1561
1562   code = TREE_CODE (top);
1563   switch (code)
1564     {
1565     case MULT_EXPR:
1566       mby = TREE_OPERAND (top, 1);
1567       if (TREE_CODE (mby) != INTEGER_CST)
1568         return false;
1569
1570       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1571         return false;
1572
1573       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1574                               precision);
1575       return true;
1576
1577     case PLUS_EXPR:
1578     case MINUS_EXPR:
1579       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1580           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1581         return false;
1582
1583       if (code == MINUS_EXPR)
1584         p1 = double_int_neg (p1);
1585       *mul = double_int_sext (double_int_add (p0, p1), precision);
1586       return true;
1587
1588     case INTEGER_CST:
1589       if (TREE_CODE (bot) != INTEGER_CST)
1590         return false;
1591
1592       p0 = double_int_sext (tree_to_double_int (top), precision);
1593       p1 = double_int_sext (tree_to_double_int (bot), precision);
1594       if (double_int_zero_p (p1))
1595         return false;
1596       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1597                               precision);
1598       return double_int_zero_p (res);
1599
1600     default:
1601       return false;
1602     }
1603 }
1604
1605 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1606
1607 static bool
1608 may_be_unaligned_p (tree ref, tree step)
1609 {
1610   tree base;
1611   tree base_type;
1612   HOST_WIDE_INT bitsize;
1613   HOST_WIDE_INT bitpos;
1614   tree toffset;
1615   enum machine_mode mode;
1616   int unsignedp, volatilep;
1617   unsigned base_align;
1618
1619   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1620      thus they are not misaligned.  */
1621   if (TREE_CODE (ref) == TARGET_MEM_REF)
1622     return false;
1623
1624   /* The test below is basically copy of what expr.c:normal_inner_ref
1625      does to check whether the object must be loaded by parts when
1626      STRICT_ALIGNMENT is true.  */
1627   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1628                               &unsignedp, &volatilep, true);
1629   base_type = TREE_TYPE (base);
1630   base_align = TYPE_ALIGN (base_type);
1631
1632   if (mode != BLKmode)
1633     {
1634       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1635
1636       if (base_align < mode_align
1637           || (bitpos % mode_align) != 0
1638           || (bitpos % BITS_PER_UNIT) != 0)
1639         return true;
1640
1641       if (toffset
1642           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1643         return true;
1644
1645       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1646         return true;
1647     }
1648
1649   return false;
1650 }
1651
1652 /* Return true if EXPR may be non-addressable.   */
1653
1654 bool
1655 may_be_nonaddressable_p (tree expr)
1656 {
1657   switch (TREE_CODE (expr))
1658     {
1659     case TARGET_MEM_REF:
1660       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1661          target, thus they are always addressable.  */
1662       return false;
1663
1664     case COMPONENT_REF:
1665       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1666              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1667
1668     case VIEW_CONVERT_EXPR:
1669       /* This kind of view-conversions may wrap non-addressable objects
1670          and make them look addressable.  After some processing the
1671          non-addressability may be uncovered again, causing ADDR_EXPRs
1672          of inappropriate objects to be built.  */
1673       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1674           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1675         return true;
1676
1677       /* ... fall through ... */
1678
1679     case ARRAY_REF:
1680     case ARRAY_RANGE_REF:
1681       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1682
1683     CASE_CONVERT:
1684       return true;
1685
1686     default:
1687       break;
1688     }
1689
1690   return false;
1691 }
1692
1693 /* Finds addresses in *OP_P inside STMT.  */
1694
1695 static void
1696 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1697 {
1698   tree base = *op_p, step = size_zero_node;
1699   struct iv *civ;
1700   struct ifs_ivopts_data ifs_ivopts_data;
1701
1702   /* Do not play with volatile memory references.  A bit too conservative,
1703      perhaps, but safe.  */
1704   if (gimple_has_volatile_ops (stmt))
1705     goto fail;
1706
1707   /* Ignore bitfields for now.  Not really something terribly complicated
1708      to handle.  TODO.  */
1709   if (TREE_CODE (base) == BIT_FIELD_REF)
1710     goto fail;
1711
1712   base = unshare_expr (base);
1713
1714   if (TREE_CODE (base) == TARGET_MEM_REF)
1715     {
1716       tree type = build_pointer_type (TREE_TYPE (base));
1717       tree astep;
1718
1719       if (TMR_BASE (base)
1720           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1721         {
1722           civ = get_iv (data, TMR_BASE (base));
1723           if (!civ)
1724             goto fail;
1725
1726           TMR_BASE (base) = civ->base;
1727           step = civ->step;
1728         }
1729       if (TMR_INDEX2 (base)
1730           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1731         {
1732           civ = get_iv (data, TMR_INDEX2 (base));
1733           if (!civ)
1734             goto fail;
1735
1736           TMR_INDEX2 (base) = civ->base;
1737           step = civ->step;
1738         }
1739       if (TMR_INDEX (base)
1740           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1741         {
1742           civ = get_iv (data, TMR_INDEX (base));
1743           if (!civ)
1744             goto fail;
1745
1746           TMR_INDEX (base) = civ->base;
1747           astep = civ->step;
1748
1749           if (astep)
1750             {
1751               if (TMR_STEP (base))
1752                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1753
1754               step = fold_build2 (PLUS_EXPR, type, step, astep);
1755             }
1756         }
1757
1758       if (integer_zerop (step))
1759         goto fail;
1760       base = tree_mem_ref_addr (type, base);
1761     }
1762   else
1763     {
1764       ifs_ivopts_data.ivopts_data = data;
1765       ifs_ivopts_data.stmt = stmt;
1766       ifs_ivopts_data.step = size_zero_node;
1767       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1768           || integer_zerop (ifs_ivopts_data.step))
1769         goto fail;
1770       step = ifs_ivopts_data.step;
1771
1772       /* Check that the base expression is addressable.  This needs
1773          to be done after substituting bases of IVs into it.  */
1774       if (may_be_nonaddressable_p (base))
1775         goto fail;
1776
1777       /* Moreover, on strict alignment platforms, check that it is
1778          sufficiently aligned.  */
1779       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1780         goto fail;
1781
1782       base = build_fold_addr_expr (base);
1783
1784       /* Substituting bases of IVs into the base expression might
1785          have caused folding opportunities.  */
1786       if (TREE_CODE (base) == ADDR_EXPR)
1787         {
1788           tree *ref = &TREE_OPERAND (base, 0);
1789           while (handled_component_p (*ref))
1790             ref = &TREE_OPERAND (*ref, 0);
1791           if (TREE_CODE (*ref) == MEM_REF)
1792             {
1793               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1794                                       TREE_OPERAND (*ref, 0),
1795                                       TREE_OPERAND (*ref, 1));
1796               if (tem)
1797                 *ref = tem;
1798             }
1799         }
1800     }
1801
1802   civ = alloc_iv (base, step);
1803   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1804   return;
1805
1806 fail:
1807   for_each_index (op_p, idx_record_use, data);
1808 }
1809
1810 /* Finds and records invariants used in STMT.  */
1811
1812 static void
1813 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1814 {
1815   ssa_op_iter iter;
1816   use_operand_p use_p;
1817   tree op;
1818
1819   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1820     {
1821       op = USE_FROM_PTR (use_p);
1822       record_invariant (data, op, false);
1823     }
1824 }
1825
1826 /* Finds interesting uses of induction variables in the statement STMT.  */
1827
1828 static void
1829 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1830 {
1831   struct iv *iv;
1832   tree op, *lhs, *rhs;
1833   ssa_op_iter iter;
1834   use_operand_p use_p;
1835   enum tree_code code;
1836
1837   find_invariants_stmt (data, stmt);
1838
1839   if (gimple_code (stmt) == GIMPLE_COND)
1840     {
1841       find_interesting_uses_cond (data, stmt);
1842       return;
1843     }
1844
1845   if (is_gimple_assign (stmt))
1846     {
1847       lhs = gimple_assign_lhs_ptr (stmt);
1848       rhs = gimple_assign_rhs1_ptr (stmt);
1849
1850       if (TREE_CODE (*lhs) == SSA_NAME)
1851         {
1852           /* If the statement defines an induction variable, the uses are not
1853              interesting by themselves.  */
1854
1855           iv = get_iv (data, *lhs);
1856
1857           if (iv && !integer_zerop (iv->step))
1858             return;
1859         }
1860
1861       code = gimple_assign_rhs_code (stmt);
1862       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1863           && (REFERENCE_CLASS_P (*rhs)
1864               || is_gimple_val (*rhs)))
1865         {
1866           if (REFERENCE_CLASS_P (*rhs))
1867             find_interesting_uses_address (data, stmt, rhs);
1868           else
1869             find_interesting_uses_op (data, *rhs);
1870
1871           if (REFERENCE_CLASS_P (*lhs))
1872             find_interesting_uses_address (data, stmt, lhs);
1873           return;
1874         }
1875       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1876         {
1877           find_interesting_uses_cond (data, stmt);
1878           return;
1879         }
1880
1881       /* TODO -- we should also handle address uses of type
1882
1883          memory = call (whatever);
1884
1885          and
1886
1887          call (memory).  */
1888     }
1889
1890   if (gimple_code (stmt) == GIMPLE_PHI
1891       && gimple_bb (stmt) == data->current_loop->header)
1892     {
1893       iv = get_iv (data, PHI_RESULT (stmt));
1894
1895       if (iv && !integer_zerop (iv->step))
1896         return;
1897     }
1898
1899   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1900     {
1901       op = USE_FROM_PTR (use_p);
1902
1903       if (TREE_CODE (op) != SSA_NAME)
1904         continue;
1905
1906       iv = get_iv (data, op);
1907       if (!iv)
1908         continue;
1909
1910       find_interesting_uses_op (data, op);
1911     }
1912 }
1913
1914 /* Finds interesting uses of induction variables outside of loops
1915    on loop exit edge EXIT.  */
1916
1917 static void
1918 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1919 {
1920   gimple phi;
1921   gimple_stmt_iterator psi;
1922   tree def;
1923
1924   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1925     {
1926       phi = gsi_stmt (psi);
1927       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1928       if (is_gimple_reg (def))
1929         find_interesting_uses_op (data, def);
1930     }
1931 }
1932
1933 /* Finds uses of the induction variables that are interesting.  */
1934
1935 static void
1936 find_interesting_uses (struct ivopts_data *data)
1937 {
1938   basic_block bb;
1939   gimple_stmt_iterator bsi;
1940   basic_block *body = get_loop_body (data->current_loop);
1941   unsigned i;
1942   struct version_info *info;
1943   edge e;
1944
1945   if (dump_file && (dump_flags & TDF_DETAILS))
1946     fprintf (dump_file, "Uses:\n\n");
1947
1948   for (i = 0; i < data->current_loop->num_nodes; i++)
1949     {
1950       edge_iterator ei;
1951       bb = body[i];
1952
1953       FOR_EACH_EDGE (e, ei, bb->succs)
1954         if (e->dest != EXIT_BLOCK_PTR
1955             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1956           find_interesting_uses_outside (data, e);
1957
1958       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1959         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1960       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1961         if (!is_gimple_debug (gsi_stmt (bsi)))
1962           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1963     }
1964
1965   if (dump_file && (dump_flags & TDF_DETAILS))
1966     {
1967       bitmap_iterator bi;
1968
1969       fprintf (dump_file, "\n");
1970
1971       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1972         {
1973           info = ver_info (data, i);
1974           if (info->inv_id)
1975             {
1976               fprintf (dump_file, "  ");
1977               print_generic_expr (dump_file, info->name, TDF_SLIM);
1978               fprintf (dump_file, " is invariant (%d)%s\n",
1979                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1980             }
1981         }
1982
1983       fprintf (dump_file, "\n");
1984     }
1985
1986   free (body);
1987 }
1988
1989 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1990    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1991    we are at the top-level of the processed address.  */
1992
1993 static tree
1994 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1995                 unsigned HOST_WIDE_INT *offset)
1996 {
1997   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1998   enum tree_code code;
1999   tree type, orig_type = TREE_TYPE (expr);
2000   unsigned HOST_WIDE_INT off0, off1, st;
2001   tree orig_expr = expr;
2002
2003   STRIP_NOPS (expr);
2004
2005   type = TREE_TYPE (expr);
2006   code = TREE_CODE (expr);
2007   *offset = 0;
2008
2009   switch (code)
2010     {
2011     case INTEGER_CST:
2012       if (!cst_and_fits_in_hwi (expr)
2013           || integer_zerop (expr))
2014         return orig_expr;
2015
2016       *offset = int_cst_value (expr);
2017       return build_int_cst (orig_type, 0);
2018
2019     case POINTER_PLUS_EXPR:
2020     case PLUS_EXPR:
2021     case MINUS_EXPR:
2022       op0 = TREE_OPERAND (expr, 0);
2023       op1 = TREE_OPERAND (expr, 1);
2024
2025       op0 = strip_offset_1 (op0, false, false, &off0);
2026       op1 = strip_offset_1 (op1, false, false, &off1);
2027
2028       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2029       if (op0 == TREE_OPERAND (expr, 0)
2030           && op1 == TREE_OPERAND (expr, 1))
2031         return orig_expr;
2032
2033       if (integer_zerop (op1))
2034         expr = op0;
2035       else if (integer_zerop (op0))
2036         {
2037           if (code == MINUS_EXPR)
2038             expr = fold_build1 (NEGATE_EXPR, type, op1);
2039           else
2040             expr = op1;
2041         }
2042       else
2043         expr = fold_build2 (code, type, op0, op1);
2044
2045       return fold_convert (orig_type, expr);
2046
2047     case MULT_EXPR:
2048       op1 = TREE_OPERAND (expr, 1);
2049       if (!cst_and_fits_in_hwi (op1))
2050         return orig_expr;
2051
2052       op0 = TREE_OPERAND (expr, 0);
2053       op0 = strip_offset_1 (op0, false, false, &off0);
2054       if (op0 == TREE_OPERAND (expr, 0))
2055         return orig_expr;
2056
2057       *offset = off0 * int_cst_value (op1);
2058       if (integer_zerop (op0))
2059         expr = op0;
2060       else
2061         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2062
2063       return fold_convert (orig_type, expr);
2064
2065     case ARRAY_REF:
2066     case ARRAY_RANGE_REF:
2067       if (!inside_addr)
2068         return orig_expr;
2069
2070       step = array_ref_element_size (expr);
2071       if (!cst_and_fits_in_hwi (step))
2072         break;
2073
2074       st = int_cst_value (step);
2075       op1 = TREE_OPERAND (expr, 1);
2076       op1 = strip_offset_1 (op1, false, false, &off1);
2077       *offset = off1 * st;
2078
2079       if (top_compref
2080           && integer_zerop (op1))
2081         {
2082           /* Strip the component reference completely.  */
2083           op0 = TREE_OPERAND (expr, 0);
2084           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2085           *offset += off0;
2086           return op0;
2087         }
2088       break;
2089
2090     case COMPONENT_REF:
2091       if (!inside_addr)
2092         return orig_expr;
2093
2094       tmp = component_ref_field_offset (expr);
2095       if (top_compref
2096           && cst_and_fits_in_hwi (tmp))
2097         {
2098           /* Strip the component reference completely.  */
2099           op0 = TREE_OPERAND (expr, 0);
2100           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2101           *offset = off0 + int_cst_value (tmp);
2102           return op0;
2103         }
2104       break;
2105
2106     case ADDR_EXPR:
2107       op0 = TREE_OPERAND (expr, 0);
2108       op0 = strip_offset_1 (op0, true, true, &off0);
2109       *offset += off0;
2110
2111       if (op0 == TREE_OPERAND (expr, 0))
2112         return orig_expr;
2113
2114       expr = build_fold_addr_expr (op0);
2115       return fold_convert (orig_type, expr);
2116
2117     case MEM_REF:
2118       /* ???  Offset operand?  */
2119       inside_addr = false;
2120       break;
2121
2122     default:
2123       return orig_expr;
2124     }
2125
2126   /* Default handling of expressions for that we want to recurse into
2127      the first operand.  */
2128   op0 = TREE_OPERAND (expr, 0);
2129   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2130   *offset += off0;
2131
2132   if (op0 == TREE_OPERAND (expr, 0)
2133       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2134     return orig_expr;
2135
2136   expr = copy_node (expr);
2137   TREE_OPERAND (expr, 0) = op0;
2138   if (op1)
2139     TREE_OPERAND (expr, 1) = op1;
2140
2141   /* Inside address, we might strip the top level component references,
2142      thus changing type of the expression.  Handling of ADDR_EXPR
2143      will fix that.  */
2144   expr = fold_convert (orig_type, expr);
2145
2146   return expr;
2147 }
2148
2149 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2150
2151 static tree
2152 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2153 {
2154   return strip_offset_1 (expr, false, false, offset);
2155 }
2156
2157 /* Returns variant of TYPE that can be used as base for different uses.
2158    We return unsigned type with the same precision, which avoids problems
2159    with overflows.  */
2160
2161 static tree
2162 generic_type_for (tree type)
2163 {
2164   if (POINTER_TYPE_P (type))
2165     return unsigned_type_for (type);
2166
2167   if (TYPE_UNSIGNED (type))
2168     return type;
2169
2170   return unsigned_type_for (type);
2171 }
2172
2173 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2174    the bitmap to that we should store it.  */
2175
2176 static struct ivopts_data *fd_ivopts_data;
2177 static tree
2178 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2179 {
2180   bitmap *depends_on = (bitmap *) data;
2181   struct version_info *info;
2182
2183   if (TREE_CODE (*expr_p) != SSA_NAME)
2184     return NULL_TREE;
2185   info = name_info (fd_ivopts_data, *expr_p);
2186
2187   if (!info->inv_id || info->has_nonlin_use)
2188     return NULL_TREE;
2189
2190   if (!*depends_on)
2191     *depends_on = BITMAP_ALLOC (NULL);
2192   bitmap_set_bit (*depends_on, info->inv_id);
2193
2194   return NULL_TREE;
2195 }
2196
2197 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2198    position to POS.  If USE is not NULL, the candidate is set as related to
2199    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2200    replacement of the final value of the iv by a direct computation.  */
2201
2202 static struct iv_cand *
2203 add_candidate_1 (struct ivopts_data *data,
2204                  tree base, tree step, bool important, enum iv_position pos,
2205                  struct iv_use *use, gimple incremented_at)
2206 {
2207   unsigned i;
2208   struct iv_cand *cand = NULL;
2209   tree type, orig_type;
2210
2211   if (base)
2212     {
2213       orig_type = TREE_TYPE (base);
2214       type = generic_type_for (orig_type);
2215       if (type != orig_type)
2216         {
2217           base = fold_convert (type, base);
2218           step = fold_convert (type, step);
2219         }
2220     }
2221
2222   for (i = 0; i < n_iv_cands (data); i++)
2223     {
2224       cand = iv_cand (data, i);
2225
2226       if (cand->pos != pos)
2227         continue;
2228
2229       if (cand->incremented_at != incremented_at
2230           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2231               && cand->ainc_use != use))
2232         continue;
2233
2234       if (!cand->iv)
2235         {
2236           if (!base && !step)
2237             break;
2238
2239           continue;
2240         }
2241
2242       if (!base && !step)
2243         continue;
2244
2245       if (operand_equal_p (base, cand->iv->base, 0)
2246           && operand_equal_p (step, cand->iv->step, 0)
2247           && (TYPE_PRECISION (TREE_TYPE (base))
2248               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2249         break;
2250     }
2251
2252   if (i == n_iv_cands (data))
2253     {
2254       cand = XCNEW (struct iv_cand);
2255       cand->id = i;
2256
2257       if (!base && !step)
2258         cand->iv = NULL;
2259       else
2260         cand->iv = alloc_iv (base, step);
2261
2262       cand->pos = pos;
2263       if (pos != IP_ORIGINAL && cand->iv)
2264         {
2265           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2266           cand->var_after = cand->var_before;
2267         }
2268       cand->important = important;
2269       cand->incremented_at = incremented_at;
2270       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2271
2272       if (step
2273           && TREE_CODE (step) != INTEGER_CST)
2274         {
2275           fd_ivopts_data = data;
2276           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2277         }
2278
2279       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2280         cand->ainc_use = use;
2281       else
2282         cand->ainc_use = NULL;
2283
2284       if (dump_file && (dump_flags & TDF_DETAILS))
2285         dump_cand (dump_file, cand);
2286     }
2287
2288   if (important && !cand->important)
2289     {
2290       cand->important = true;
2291       if (dump_file && (dump_flags & TDF_DETAILS))
2292         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2293     }
2294
2295   if (use)
2296     {
2297       bitmap_set_bit (use->related_cands, i);
2298       if (dump_file && (dump_flags & TDF_DETAILS))
2299         fprintf (dump_file, "Candidate %d is related to use %d\n",
2300                  cand->id, use->id);
2301     }
2302
2303   return cand;
2304 }
2305
2306 /* Returns true if incrementing the induction variable at the end of the LOOP
2307    is allowed.
2308
2309    The purpose is to avoid splitting latch edge with a biv increment, thus
2310    creating a jump, possibly confusing other optimization passes and leaving
2311    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2312    is not available (so we do not have a better alternative), or if the latch
2313    edge is already nonempty.  */
2314
2315 static bool
2316 allow_ip_end_pos_p (struct loop *loop)
2317 {
2318   if (!ip_normal_pos (loop))
2319     return true;
2320
2321   if (!empty_block_p (ip_end_pos (loop)))
2322     return true;
2323
2324   return false;
2325 }
2326
2327 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2328    Important field is set to IMPORTANT.  */
2329
2330 static void
2331 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2332                         bool important, struct iv_use *use)
2333 {
2334   basic_block use_bb = gimple_bb (use->stmt);
2335   enum machine_mode mem_mode;
2336   unsigned HOST_WIDE_INT cstepi;
2337
2338   /* If we insert the increment in any position other than the standard
2339      ones, we must ensure that it is incremented once per iteration.
2340      It must not be in an inner nested loop, or one side of an if
2341      statement.  */
2342   if (use_bb->loop_father != data->current_loop
2343       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2344       || stmt_could_throw_p (use->stmt)
2345       || !cst_and_fits_in_hwi (step))
2346     return;
2347
2348   cstepi = int_cst_value (step);
2349
2350   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2351   if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2352       || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2353     {
2354       enum tree_code code = MINUS_EXPR;
2355       tree new_base;
2356       tree new_step = step;
2357
2358       if (POINTER_TYPE_P (TREE_TYPE (base)))
2359         {
2360           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2361           code = POINTER_PLUS_EXPR;
2362         }
2363       else
2364         new_step = fold_convert (TREE_TYPE (base), new_step);
2365       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2366       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2367                        use->stmt);
2368     }
2369   if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2370       || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2371     {
2372       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2373                        use->stmt);
2374     }
2375 }
2376
2377 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2378    position to POS.  If USE is not NULL, the candidate is set as related to
2379    it.  The candidate computation is scheduled on all available positions.  */
2380
2381 static void
2382 add_candidate (struct ivopts_data *data,
2383                tree base, tree step, bool important, struct iv_use *use)
2384 {
2385   if (ip_normal_pos (data->current_loop))
2386     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2387   if (ip_end_pos (data->current_loop)
2388       && allow_ip_end_pos_p (data->current_loop))
2389     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2390
2391   if (use != NULL && use->type == USE_ADDRESS)
2392     add_autoinc_candidates (data, base, step, important, use);
2393 }
2394
2395 /* Add a standard "0 + 1 * iteration" iv candidate for a
2396    type with SIZE bits.  */
2397
2398 static void
2399 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2400                                      unsigned int size)
2401 {
2402   tree type = lang_hooks.types.type_for_size (size, true);
2403   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2404                  true, NULL);
2405 }
2406
2407 /* Adds standard iv candidates.  */
2408
2409 static void
2410 add_standard_iv_candidates (struct ivopts_data *data)
2411 {
2412   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2413
2414   /* The same for a double-integer type if it is still fast enough.  */
2415   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2416     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2417 }
2418
2419
2420 /* Adds candidates bases on the old induction variable IV.  */
2421
2422 static void
2423 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2424 {
2425   gimple phi;
2426   tree def;
2427   struct iv_cand *cand;
2428
2429   add_candidate (data, iv->base, iv->step, true, NULL);
2430
2431   /* The same, but with initial value zero.  */
2432   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2433     add_candidate (data, size_int (0), iv->step, true, NULL);
2434   else
2435     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2436                    iv->step, true, NULL);
2437
2438   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2439   if (gimple_code (phi) == GIMPLE_PHI)
2440     {
2441       /* Additionally record the possibility of leaving the original iv
2442          untouched.  */
2443       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2444       cand = add_candidate_1 (data,
2445                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2446                               SSA_NAME_DEF_STMT (def));
2447       cand->var_before = iv->ssa_name;
2448       cand->var_after = def;
2449     }
2450 }
2451
2452 /* Adds candidates based on the old induction variables.  */
2453
2454 static void
2455 add_old_ivs_candidates (struct ivopts_data *data)
2456 {
2457   unsigned i;
2458   struct iv *iv;
2459   bitmap_iterator bi;
2460
2461   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2462     {
2463       iv = ver_info (data, i)->iv;
2464       if (iv && iv->biv_p && !integer_zerop (iv->step))
2465         add_old_iv_candidates (data, iv);
2466     }
2467 }
2468
2469 /* Adds candidates based on the value of the induction variable IV and USE.  */
2470
2471 static void
2472 add_iv_value_candidates (struct ivopts_data *data,
2473                          struct iv *iv, struct iv_use *use)
2474 {
2475   unsigned HOST_WIDE_INT offset;
2476   tree base;
2477   tree basetype;
2478
2479   add_candidate (data, iv->base, iv->step, false, use);
2480
2481   /* The same, but with initial value zero.  Make such variable important,
2482      since it is generic enough so that possibly many uses may be based
2483      on it.  */
2484   basetype = TREE_TYPE (iv->base);
2485   if (POINTER_TYPE_P (basetype))
2486     basetype = sizetype;
2487   add_candidate (data, build_int_cst (basetype, 0),
2488                  iv->step, true, use);
2489
2490   /* Third, try removing the constant offset.  Make sure to even
2491      add a candidate for &a[0] vs. (T *)&a.  */
2492   base = strip_offset (iv->base, &offset);
2493   if (offset
2494       || base != iv->base)
2495     add_candidate (data, base, iv->step, false, use);
2496 }
2497
2498 /* Adds candidates based on the uses.  */
2499
2500 static void
2501 add_derived_ivs_candidates (struct ivopts_data *data)
2502 {
2503   unsigned i;
2504
2505   for (i = 0; i < n_iv_uses (data); i++)
2506     {
2507       struct iv_use *use = iv_use (data, i);
2508
2509       if (!use)
2510         continue;
2511
2512       switch (use->type)
2513         {
2514         case USE_NONLINEAR_EXPR:
2515         case USE_COMPARE:
2516         case USE_ADDRESS:
2517           /* Just add the ivs based on the value of the iv used here.  */
2518           add_iv_value_candidates (data, use->iv, use);
2519           break;
2520
2521         default:
2522           gcc_unreachable ();
2523         }
2524     }
2525 }
2526
2527 /* Record important candidates and add them to related_cands bitmaps
2528    if needed.  */
2529
2530 static void
2531 record_important_candidates (struct ivopts_data *data)
2532 {
2533   unsigned i;
2534   struct iv_use *use;
2535
2536   for (i = 0; i < n_iv_cands (data); i++)
2537     {
2538       struct iv_cand *cand = iv_cand (data, i);
2539
2540       if (cand->important)
2541         bitmap_set_bit (data->important_candidates, i);
2542     }
2543
2544   data->consider_all_candidates = (n_iv_cands (data)
2545                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2546
2547   if (data->consider_all_candidates)
2548     {
2549       /* We will not need "related_cands" bitmaps in this case,
2550          so release them to decrease peak memory consumption.  */
2551       for (i = 0; i < n_iv_uses (data); i++)
2552         {
2553           use = iv_use (data, i);
2554           BITMAP_FREE (use->related_cands);
2555         }
2556     }
2557   else
2558     {
2559       /* Add important candidates to the related_cands bitmaps.  */
2560       for (i = 0; i < n_iv_uses (data); i++)
2561         bitmap_ior_into (iv_use (data, i)->related_cands,
2562                          data->important_candidates);
2563     }
2564 }
2565
2566 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2567    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2568    we allocate a simple list to every use.  */
2569
2570 static void
2571 alloc_use_cost_map (struct ivopts_data *data)
2572 {
2573   unsigned i, size, s, j;
2574
2575   for (i = 0; i < n_iv_uses (data); i++)
2576     {
2577       struct iv_use *use = iv_use (data, i);
2578       bitmap_iterator bi;
2579
2580       if (data->consider_all_candidates)
2581         size = n_iv_cands (data);
2582       else
2583         {
2584           s = 0;
2585           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2586             {
2587               s++;
2588             }
2589
2590           /* Round up to the power of two, so that moduling by it is fast.  */
2591           for (size = 1; size < s; size <<= 1)
2592             continue;
2593         }
2594
2595       use->n_map_members = size;
2596       use->cost_map = XCNEWVEC (struct cost_pair, size);
2597     }
2598 }
2599
2600 /* Returns description of computation cost of expression whose runtime
2601    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2602
2603 static comp_cost
2604 new_cost (unsigned runtime, unsigned complexity)
2605 {
2606   comp_cost cost;
2607
2608   cost.cost = runtime;
2609   cost.complexity = complexity;
2610
2611   return cost;
2612 }
2613
2614 /* Adds costs COST1 and COST2.  */
2615
2616 static comp_cost
2617 add_costs (comp_cost cost1, comp_cost cost2)
2618 {
2619   cost1.cost += cost2.cost;
2620   cost1.complexity += cost2.complexity;
2621
2622   return cost1;
2623 }
2624 /* Subtracts costs COST1 and COST2.  */
2625
2626 static comp_cost
2627 sub_costs (comp_cost cost1, comp_cost cost2)
2628 {
2629   cost1.cost -= cost2.cost;
2630   cost1.complexity -= cost2.complexity;
2631
2632   return cost1;
2633 }
2634
2635 /* Returns a negative number if COST1 < COST2, a positive number if
2636    COST1 > COST2, and 0 if COST1 = COST2.  */
2637
2638 static int
2639 compare_costs (comp_cost cost1, comp_cost cost2)
2640 {
2641   if (cost1.cost == cost2.cost)
2642     return cost1.complexity - cost2.complexity;
2643
2644   return cost1.cost - cost2.cost;
2645 }
2646
2647 /* Returns true if COST is infinite.  */
2648
2649 static bool
2650 infinite_cost_p (comp_cost cost)
2651 {
2652   return cost.cost == INFTY;
2653 }
2654
2655 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2656    on invariants DEPENDS_ON and that the value used in expressing it
2657    is VALUE.  */
2658
2659 static void
2660 set_use_iv_cost (struct ivopts_data *data,
2661                  struct iv_use *use, struct iv_cand *cand,
2662                  comp_cost cost, bitmap depends_on, tree value,
2663                  int inv_expr_id)
2664 {
2665   unsigned i, s;
2666
2667   if (infinite_cost_p (cost))
2668     {
2669       BITMAP_FREE (depends_on);
2670       return;
2671     }
2672
2673   if (data->consider_all_candidates)
2674     {
2675       use->cost_map[cand->id].cand = cand;
2676       use->cost_map[cand->id].cost = cost;
2677       use->cost_map[cand->id].depends_on = depends_on;
2678       use->cost_map[cand->id].value = value;
2679       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2680       return;
2681     }
2682
2683   /* n_map_members is a power of two, so this computes modulo.  */
2684   s = cand->id & (use->n_map_members - 1);
2685   for (i = s; i < use->n_map_members; i++)
2686     if (!use->cost_map[i].cand)
2687       goto found;
2688   for (i = 0; i < s; i++)
2689     if (!use->cost_map[i].cand)
2690       goto found;
2691
2692   gcc_unreachable ();
2693
2694 found:
2695   use->cost_map[i].cand = cand;
2696   use->cost_map[i].cost = cost;
2697   use->cost_map[i].depends_on = depends_on;
2698   use->cost_map[i].value = value;
2699   use->cost_map[i].inv_expr_id = inv_expr_id;
2700 }
2701
2702 /* Gets cost of (USE, CANDIDATE) pair.  */
2703
2704 static struct cost_pair *
2705 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2706                  struct iv_cand *cand)
2707 {
2708   unsigned i, s;
2709   struct cost_pair *ret;
2710
2711   if (!cand)
2712     return NULL;
2713
2714   if (data->consider_all_candidates)
2715     {
2716       ret = use->cost_map + cand->id;
2717       if (!ret->cand)
2718         return NULL;
2719
2720       return ret;
2721     }
2722
2723   /* n_map_members is a power of two, so this computes modulo.  */
2724   s = cand->id & (use->n_map_members - 1);
2725   for (i = s; i < use->n_map_members; i++)
2726     if (use->cost_map[i].cand == cand)
2727       return use->cost_map + i;
2728
2729   for (i = 0; i < s; i++)
2730     if (use->cost_map[i].cand == cand)
2731       return use->cost_map + i;
2732
2733   return NULL;
2734 }
2735
2736 /* Returns estimate on cost of computing SEQ.  */
2737
2738 static unsigned
2739 seq_cost (rtx seq, bool speed)
2740 {
2741   unsigned cost = 0;
2742   rtx set;
2743
2744   for (; seq; seq = NEXT_INSN (seq))
2745     {
2746       set = single_set (seq);
2747       if (set)
2748         cost += rtx_cost (SET_SRC (set), SET, speed);
2749       else
2750         cost++;
2751     }
2752
2753   return cost;
2754 }
2755
2756 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2757 static rtx
2758 produce_memory_decl_rtl (tree obj, int *regno)
2759 {
2760   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2761   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2762   rtx x;
2763
2764   gcc_assert (obj);
2765   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2766     {
2767       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2768       x = gen_rtx_SYMBOL_REF (address_mode, name);
2769       SET_SYMBOL_REF_DECL (x, obj);
2770       x = gen_rtx_MEM (DECL_MODE (obj), x);
2771       set_mem_addr_space (x, as);
2772       targetm.encode_section_info (obj, x, true);
2773     }
2774   else
2775     {
2776       x = gen_raw_REG (address_mode, (*regno)++);
2777       x = gen_rtx_MEM (DECL_MODE (obj), x);
2778       set_mem_addr_space (x, as);
2779     }
2780
2781   return x;
2782 }
2783
2784 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2785    walk_tree.  DATA contains the actual fake register number.  */
2786
2787 static tree
2788 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2789 {
2790   tree obj = NULL_TREE;
2791   rtx x = NULL_RTX;
2792   int *regno = (int *) data;
2793
2794   switch (TREE_CODE (*expr_p))
2795     {
2796     case ADDR_EXPR:
2797       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2798            handled_component_p (*expr_p);
2799            expr_p = &TREE_OPERAND (*expr_p, 0))
2800         continue;
2801       obj = *expr_p;
2802       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2803         x = produce_memory_decl_rtl (obj, regno);
2804       break;
2805
2806     case SSA_NAME:
2807       *ws = 0;
2808       obj = SSA_NAME_VAR (*expr_p);
2809       if (!DECL_RTL_SET_P (obj))
2810         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2811       break;
2812
2813     case VAR_DECL:
2814     case PARM_DECL:
2815     case RESULT_DECL:
2816       *ws = 0;
2817       obj = *expr_p;
2818
2819       if (DECL_RTL_SET_P (obj))
2820         break;
2821
2822       if (DECL_MODE (obj) == BLKmode)
2823         x = produce_memory_decl_rtl (obj, regno);
2824       else
2825         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2826
2827       break;
2828
2829     default:
2830       break;
2831     }
2832
2833   if (x)
2834     {
2835       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2836       SET_DECL_RTL (obj, x);
2837     }
2838
2839   return NULL_TREE;
2840 }
2841
2842 /* Determines cost of the computation of EXPR.  */
2843
2844 static unsigned
2845 computation_cost (tree expr, bool speed)
2846 {
2847   rtx seq, rslt;
2848   tree type = TREE_TYPE (expr);
2849   unsigned cost;
2850   /* Avoid using hard regs in ways which may be unsupported.  */
2851   int regno = LAST_VIRTUAL_REGISTER + 1;
2852   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2853   enum node_frequency real_frequency = node->frequency;
2854
2855   node->frequency = NODE_FREQUENCY_NORMAL;
2856   crtl->maybe_hot_insn_p = speed;
2857   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2858   start_sequence ();
2859   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2860   seq = get_insns ();
2861   end_sequence ();
2862   default_rtl_profile ();
2863   node->frequency = real_frequency;
2864
2865   cost = seq_cost (seq, speed);
2866   if (MEM_P (rslt))
2867     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2868                           TYPE_ADDR_SPACE (type), speed);
2869   else if (!REG_P (rslt))
2870     cost += rtx_cost (rslt, SET, speed);
2871
2872   return cost;
2873 }
2874
2875 /* Returns variable containing the value of candidate CAND at statement AT.  */
2876
2877 static tree
2878 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2879 {
2880   if (stmt_after_increment (loop, cand, stmt))
2881     return cand->var_after;
2882   else
2883     return cand->var_before;
2884 }
2885
2886 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2887    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2888
2889 int
2890 tree_int_cst_sign_bit (const_tree t)
2891 {
2892   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2893   unsigned HOST_WIDE_INT w;
2894
2895   if (bitno < HOST_BITS_PER_WIDE_INT)
2896     w = TREE_INT_CST_LOW (t);
2897   else
2898     {
2899       w = TREE_INT_CST_HIGH (t);
2900       bitno -= HOST_BITS_PER_WIDE_INT;
2901     }
2902
2903   return (w >> bitno) & 1;
2904 }
2905
2906 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2907    same precision that is at least as wide as the precision of TYPE, stores
2908    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2909    type of A and B.  */
2910
2911 static tree
2912 determine_common_wider_type (tree *a, tree *b)
2913 {
2914   tree wider_type = NULL;
2915   tree suba, subb;
2916   tree atype = TREE_TYPE (*a);
2917
2918   if (CONVERT_EXPR_P (*a))
2919     {
2920       suba = TREE_OPERAND (*a, 0);
2921       wider_type = TREE_TYPE (suba);
2922       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2923         return atype;
2924     }
2925   else
2926     return atype;
2927
2928   if (CONVERT_EXPR_P (*b))
2929     {
2930       subb = TREE_OPERAND (*b, 0);
2931       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2932         return atype;
2933     }
2934   else
2935     return atype;
2936
2937   *a = suba;
2938   *b = subb;
2939   return wider_type;
2940 }
2941
2942 /* Determines the expression by that USE is expressed from induction variable
2943    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2944    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2945
2946 static bool
2947 get_computation_aff (struct loop *loop,
2948                      struct iv_use *use, struct iv_cand *cand, gimple at,
2949                      struct affine_tree_combination *aff)
2950 {
2951   tree ubase = use->iv->base;
2952   tree ustep = use->iv->step;
2953   tree cbase = cand->iv->base;
2954   tree cstep = cand->iv->step, cstep_common;
2955   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2956   tree common_type, var;
2957   tree uutype;
2958   aff_tree cbase_aff, var_aff;
2959   double_int rat;
2960
2961   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2962     {
2963       /* We do not have a precision to express the values of use.  */
2964       return false;
2965     }
2966
2967   var = var_at_stmt (loop, cand, at);
2968   uutype = unsigned_type_for (utype);
2969
2970   /* If the conversion is not noop, perform it.  */
2971   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2972     {
2973       cstep = fold_convert (uutype, cstep);
2974       cbase = fold_convert (uutype, cbase);
2975       var = fold_convert (uutype, var);
2976     }
2977
2978   if (!constant_multiple_of (ustep, cstep, &rat))
2979     return false;
2980
2981   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2982      type, we achieve better folding by computing their difference in this
2983      wider type, and cast the result to UUTYPE.  We do not need to worry about
2984      overflows, as all the arithmetics will in the end be performed in UUTYPE
2985      anyway.  */
2986   common_type = determine_common_wider_type (&ubase, &cbase);
2987
2988   /* use = ubase - ratio * cbase + ratio * var.  */
2989   tree_to_aff_combination (ubase, common_type, aff);
2990   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2991   tree_to_aff_combination (var, uutype, &var_aff);
2992
2993   /* We need to shift the value if we are after the increment.  */
2994   if (stmt_after_increment (loop, cand, at))
2995     {
2996       aff_tree cstep_aff;
2997
2998       if (common_type != uutype)
2999         cstep_common = fold_convert (common_type, cstep);
3000       else
3001         cstep_common = cstep;
3002
3003       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3004       aff_combination_add (&cbase_aff, &cstep_aff);
3005     }
3006
3007   aff_combination_scale (&cbase_aff, double_int_neg (rat));
3008   aff_combination_add (aff, &cbase_aff);
3009   if (common_type != uutype)
3010     aff_combination_convert (aff, uutype);
3011
3012   aff_combination_scale (&var_aff, rat);
3013   aff_combination_add (aff, &var_aff);
3014
3015   return true;
3016 }
3017
3018 /* Determines the expression by that USE is expressed from induction variable
3019    CAND at statement AT in LOOP.  The computation is unshared.  */
3020
3021 static tree
3022 get_computation_at (struct loop *loop,
3023                     struct iv_use *use, struct iv_cand *cand, gimple at)
3024 {
3025   aff_tree aff;
3026   tree type = TREE_TYPE (use->iv->base);
3027
3028   if (!get_computation_aff (loop, use, cand, at, &aff))
3029     return NULL_TREE;
3030   unshare_aff_combination (&aff);
3031   return fold_convert (type, aff_combination_to_tree (&aff));
3032 }
3033
3034 /* Determines the expression by that USE is expressed from induction variable
3035    CAND in LOOP.  The computation is unshared.  */
3036
3037 static tree
3038 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3039 {
3040   return get_computation_at (loop, use, cand, use->stmt);
3041 }
3042
3043 /* Adjust the cost COST for being in loop setup rather than loop body.
3044    If we're optimizing for space, the loop setup overhead is constant;
3045    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3046 static unsigned
3047 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3048 {
3049   if (cost == INFTY)
3050     return cost;
3051   else if (optimize_loop_for_speed_p (data->current_loop))
3052     return cost / avg_loop_niter (data->current_loop);
3053   else
3054     return cost;
3055 }
3056
3057 /* Returns cost of addition in MODE.  */
3058
3059 static unsigned
3060 add_cost (enum machine_mode mode, bool speed)
3061 {
3062   static unsigned costs[NUM_MACHINE_MODES];
3063   rtx seq;
3064   unsigned cost;
3065
3066   if (costs[mode])
3067     return costs[mode];
3068
3069   start_sequence ();
3070   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3071                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3072                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3073                  NULL_RTX);
3074   seq = get_insns ();
3075   end_sequence ();
3076
3077   cost = seq_cost (seq, speed);
3078   if (!cost)
3079     cost = 1;
3080
3081   costs[mode] = cost;
3082
3083   if (dump_file && (dump_flags & TDF_DETAILS))
3084     fprintf (dump_file, "Addition in %s costs %d\n",
3085              GET_MODE_NAME (mode), cost);
3086   return cost;
3087 }
3088
3089 /* Entry in a hashtable of already known costs for multiplication.  */
3090 struct mbc_entry
3091 {
3092   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3093   enum machine_mode mode;       /* In mode.  */
3094   unsigned cost;                /* The cost.  */
3095 };
3096
3097 /* Counts hash value for the ENTRY.  */
3098
3099 static hashval_t
3100 mbc_entry_hash (const void *entry)
3101 {
3102   const struct mbc_entry *e = (const struct mbc_entry *) entry;
3103
3104   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3105 }
3106
3107 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3108
3109 static int
3110 mbc_entry_eq (const void *entry1, const void *entry2)
3111 {
3112   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3113   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3114
3115   return (e1->mode == e2->mode
3116           && e1->cst == e2->cst);
3117 }
3118
3119 /* Returns cost of multiplication by constant CST in MODE.  */
3120
3121 unsigned
3122 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3123 {
3124   static htab_t costs;
3125   struct mbc_entry **cached, act;
3126   rtx seq;
3127   unsigned cost;
3128
3129   if (!costs)
3130     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3131
3132   act.mode = mode;
3133   act.cst = cst;
3134   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3135   if (*cached)
3136     return (*cached)->cost;
3137
3138   *cached = XNEW (struct mbc_entry);
3139   (*cached)->mode = mode;
3140   (*cached)->cst = cst;
3141
3142   start_sequence ();
3143   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3144                gen_int_mode (cst, mode), NULL_RTX, 0);
3145   seq = get_insns ();
3146   end_sequence ();
3147
3148   cost = seq_cost (seq, speed);
3149
3150   if (dump_file && (dump_flags & TDF_DETAILS))
3151     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3152              (int) cst, GET_MODE_NAME (mode), cost);
3153
3154   (*cached)->cost = cost;
3155
3156   return cost;
3157 }
3158
3159 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3160    validity for a memory reference accessing memory of mode MODE in
3161    address space AS.  */
3162
3163 DEF_VEC_P (sbitmap);
3164 DEF_VEC_ALLOC_P (sbitmap, heap);
3165
3166 bool
3167 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3168                                  addr_space_t as)
3169 {
3170 #define MAX_RATIO 128
3171   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3172   static VEC (sbitmap, heap) *valid_mult_list;
3173   sbitmap valid_mult;
3174
3175   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3176     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3177
3178   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3179   if (!valid_mult)
3180     {
3181       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3182       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3183       rtx addr;
3184       HOST_WIDE_INT i;
3185
3186       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3187       sbitmap_zero (valid_mult);
3188       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3189       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3190         {
3191           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3192           if (memory_address_addr_space_p (mode, addr, as))
3193             SET_BIT (valid_mult, i + MAX_RATIO);
3194         }
3195
3196       if (dump_file && (dump_flags & TDF_DETAILS))
3197         {
3198           fprintf (dump_file, "  allowed multipliers:");
3199           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3200             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3201               fprintf (dump_file, " %d", (int) i);
3202           fprintf (dump_file, "\n");
3203           fprintf (dump_file, "\n");
3204         }
3205
3206       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3207     }
3208
3209   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3210     return false;
3211
3212   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3213 }
3214
3215 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3216    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3217    variable is omitted.  Compute the cost for a memory reference that accesses
3218    a memory location of mode MEM_MODE in address space AS.
3219
3220    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3221    size of MEM_MODE / RATIO) is available.  To make this determination, we
3222    look at the size of the increment to be made, which is given in CSTEP.
3223    CSTEP may be zero if the step is unknown.
3224    STMT_AFTER_INC is true iff the statement we're looking at is after the
3225    increment of the original biv.
3226
3227    TODO -- there must be some better way.  This all is quite crude.  */
3228
3229 typedef struct
3230 {
3231   HOST_WIDE_INT min_offset, max_offset;
3232   unsigned costs[2][2][2][2];
3233 } *address_cost_data;
3234
3235 DEF_VEC_P (address_cost_data);
3236 DEF_VEC_ALLOC_P (address_cost_data, heap);
3237
3238 static comp_cost
3239 get_address_cost (bool symbol_present, bool var_present,
3240                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3241                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3242                   addr_space_t as, bool speed,
3243                   bool stmt_after_inc, bool *may_autoinc)
3244 {
3245   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3246   static VEC(address_cost_data, heap) *address_cost_data_list;
3247   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3248   address_cost_data data;
3249   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3250   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3251   unsigned cost, acost, complexity;
3252   bool offset_p, ratio_p, autoinc;
3253   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3254   unsigned HOST_WIDE_INT mask;
3255   unsigned bits;
3256
3257   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3258     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3259                            data_index + 1);
3260
3261   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3262   if (!data)
3263     {
3264       HOST_WIDE_INT i;
3265       HOST_WIDE_INT rat, off = 0;
3266       int old_cse_not_expected, width;
3267       unsigned sym_p, var_p, off_p, rat_p, add_c;
3268       rtx seq, addr, base;
3269       rtx reg0, reg1;
3270
3271       data = (address_cost_data) xcalloc (1, sizeof (*data));
3272
3273       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3274
3275       width = GET_MODE_BITSIZE (address_mode) - 1;
3276       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3277         width = HOST_BITS_PER_WIDE_INT - 1;
3278       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3279
3280       for (i = width; i >= 0; i--)
3281         {
3282           off = -((HOST_WIDE_INT) 1 << i);
3283           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3284           if (memory_address_addr_space_p (mem_mode, addr, as))
3285             break;
3286         }
3287       data->min_offset = (i == -1? 0 : off);
3288
3289       for (i = width; i >= 0; i--)
3290         {
3291           off = ((HOST_WIDE_INT) 1 << i) - 1;
3292           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3293           if (memory_address_addr_space_p (mem_mode, addr, as))
3294             break;
3295         }
3296       if (i == -1)
3297         off = 0;
3298       data->max_offset = off;
3299
3300       if (dump_file && (dump_flags & TDF_DETAILS))
3301         {
3302           fprintf (dump_file, "get_address_cost:\n");
3303           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3304                    GET_MODE_NAME (mem_mode),
3305                    data->min_offset);
3306           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3307                    GET_MODE_NAME (mem_mode),
3308                    data->max_offset);
3309         }
3310
3311       rat = 1;
3312       for (i = 2; i <= MAX_RATIO; i++)
3313         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3314           {
3315             rat = i;
3316             break;
3317           }
3318
3319       /* Compute the cost of various addressing modes.  */
3320       acost = 0;
3321       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3322       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3323
3324       if (HAVE_PRE_DECREMENT)
3325         {
3326           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3327           has_predec[mem_mode]
3328             = memory_address_addr_space_p (mem_mode, addr, as);
3329         }
3330       if (HAVE_POST_DECREMENT)
3331         {
3332           addr = gen_rtx_POST_DEC (address_mode, reg0);
3333           has_postdec[mem_mode]
3334             = memory_address_addr_space_p (mem_mode, addr, as);
3335         }
3336       if (HAVE_PRE_INCREMENT)
3337         {
3338           addr = gen_rtx_PRE_INC (address_mode, reg0);
3339           has_preinc[mem_mode]
3340             = memory_address_addr_space_p (mem_mode, addr, as);
3341         }
3342       if (HAVE_POST_INCREMENT)
3343         {
3344           addr = gen_rtx_POST_INC (address_mode, reg0);
3345           has_postinc[mem_mode]
3346             = memory_address_addr_space_p (mem_mode, addr, as);
3347         }
3348       for (i = 0; i < 16; i++)
3349         {
3350           sym_p = i & 1;
3351           var_p = (i >> 1) & 1;
3352           off_p = (i >> 2) & 1;
3353           rat_p = (i >> 3) & 1;
3354
3355           addr = reg0;
3356           if (rat_p)
3357             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3358                                    gen_int_mode (rat, address_mode));
3359
3360           if (var_p)
3361             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3362
3363           if (sym_p)
3364             {
3365               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3366               /* ??? We can run into trouble with some backends by presenting
3367                  it with symbols which haven't been properly passed through
3368                  targetm.encode_section_info.  By setting the local bit, we
3369                  enhance the probability of things working.  */
3370               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3371
3372               if (off_p)
3373                 base = gen_rtx_fmt_e (CONST, address_mode,
3374                                       gen_rtx_fmt_ee
3375                                         (PLUS, address_mode, base,
3376                                          gen_int_mode (off, address_mode)));
3377             }
3378           else if (off_p)
3379             base = gen_int_mode (off, address_mode);
3380           else
3381             base = NULL_RTX;
3382
3383           if (base)
3384             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3385
3386           start_sequence ();
3387           /* To avoid splitting addressing modes, pretend that no cse will
3388              follow.  */
3389           old_cse_not_expected = cse_not_expected;
3390           cse_not_expected = true;
3391           addr = memory_address_addr_space (mem_mode, addr, as);
3392           cse_not_expected = old_cse_not_expected;
3393           seq = get_insns ();
3394           end_sequence ();
3395
3396           acost = seq_cost (seq, speed);
3397           acost += address_cost (addr, mem_mode, as, speed);
3398
3399           if (!acost)
3400             acost = 1;
3401           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3402         }
3403
3404       /* On some targets, it is quite expensive to load symbol to a register,
3405          which makes addresses that contain symbols look much more expensive.
3406          However, the symbol will have to be loaded in any case before the
3407          loop (and quite likely we have it in register already), so it does not
3408          make much sense to penalize them too heavily.  So make some final
3409          tweaks for the SYMBOL_PRESENT modes:
3410
3411          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3412          var is cheaper, use this mode with small penalty.
3413          If VAR_PRESENT is true, try whether the mode with
3414          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3415          if this is the case, use it.  */
3416       add_c = add_cost (address_mode, speed);
3417       for (i = 0; i < 8; i++)
3418         {
3419           var_p = i & 1;
3420           off_p = (i >> 1) & 1;
3421           rat_p = (i >> 2) & 1;
3422
3423           acost = data->costs[0][1][off_p][rat_p] + 1;
3424           if (var_p)
3425             acost += add_c;
3426
3427           if (acost < data->costs[1][var_p][off_p][rat_p])
3428             data->costs[1][var_p][off_p][rat_p] = acost;
3429         }
3430
3431       if (dump_file && (dump_flags & TDF_DETAILS))
3432         {
3433           fprintf (dump_file, "Address costs:\n");
3434
3435           for (i = 0; i < 16; i++)
3436             {
3437               sym_p = i & 1;
3438               var_p = (i >> 1) & 1;
3439               off_p = (i >> 2) & 1;
3440               rat_p = (i >> 3) & 1;
3441
3442               fprintf (dump_file, "  ");
3443               if (sym_p)
3444                 fprintf (dump_file, "sym + ");
3445               if (var_p)
3446                 fprintf (dump_file, "var + ");
3447               if (off_p)
3448                 fprintf (dump_file, "cst + ");
3449               if (rat_p)
3450                 fprintf (dump_file, "rat * ");
3451
3452               acost = data->costs[sym_p][var_p][off_p][rat_p];
3453               fprintf (dump_file, "index costs %d\n", acost);
3454             }
3455           if (has_predec[mem_mode] || has_postdec[mem_mode]
3456               || has_preinc[mem_mode] || has_postinc[mem_mode])
3457             fprintf (dump_file, "  May include autoinc/dec\n");
3458           fprintf (dump_file, "\n");
3459         }
3460
3461       VEC_replace (address_cost_data, address_cost_data_list,
3462                    data_index, data);
3463     }
3464
3465   bits = GET_MODE_BITSIZE (address_mode);
3466   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3467   offset &= mask;
3468   if ((offset >> (bits - 1) & 1))
3469     offset |= ~mask;
3470   s_offset = offset;
3471
3472   autoinc = false;
3473   msize = GET_MODE_SIZE (mem_mode);
3474   autoinc_offset = offset;
3475   if (stmt_after_inc)
3476     autoinc_offset += ratio * cstep;
3477   if (symbol_present || var_present || ratio != 1)
3478     autoinc = false;
3479   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3480                && msize == cstep)
3481            || (has_postdec[mem_mode] && autoinc_offset == 0
3482                && msize == -cstep)
3483            || (has_preinc[mem_mode] && autoinc_offset == msize
3484                && msize == cstep)
3485            || (has_predec[mem_mode] && autoinc_offset == -msize
3486                && msize == -cstep))
3487     autoinc = true;
3488
3489   cost = 0;
3490   offset_p = (s_offset != 0
3491               && data->min_offset <= s_offset
3492               && s_offset <= data->max_offset);
3493   ratio_p = (ratio != 1
3494              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3495
3496   if (ratio != 1 && !ratio_p)
3497     cost += multiply_by_cost (ratio, address_mode, speed);
3498
3499   if (s_offset && !offset_p && !symbol_present)
3500     cost += add_cost (address_mode, speed);
3501
3502   if (may_autoinc)
3503     *may_autoinc = autoinc;
3504   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3505   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3506   return new_cost (cost + acost, complexity);
3507 }
3508
3509 /* Estimates cost of forcing expression EXPR into a variable.  */
3510
3511 static comp_cost
3512 force_expr_to_var_cost (tree expr, bool speed)
3513 {
3514   static bool costs_initialized = false;
3515   static unsigned integer_cost [2];
3516   static unsigned symbol_cost [2];
3517   static unsigned address_cost [2];
3518   tree op0, op1;
3519   comp_cost cost0, cost1, cost;
3520   enum machine_mode mode;
3521
3522   if (!costs_initialized)
3523     {
3524       tree type = build_pointer_type (integer_type_node);
3525       tree var, addr;
3526       rtx x;
3527       int i;
3528
3529       var = create_tmp_var_raw (integer_type_node, "test_var");
3530       TREE_STATIC (var) = 1;
3531       x = produce_memory_decl_rtl (var, NULL);
3532       SET_DECL_RTL (var, x);
3533
3534       addr = build1 (ADDR_EXPR, type, var);
3535
3536
3537       for (i = 0; i < 2; i++)
3538         {
3539           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3540                                                              2000), i);
3541
3542           symbol_cost[i] = computation_cost (addr, i) + 1;
3543
3544           address_cost[i]
3545             = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3546                                         addr,
3547                                         build_int_cst (sizetype, 2000)), i) + 1;
3548           if (dump_file && (dump_flags & TDF_DETAILS))
3549             {
3550               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3551               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3552               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3553               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3554               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3555               fprintf (dump_file, "\n");
3556             }
3557         }
3558
3559       costs_initialized = true;
3560     }
3561
3562   STRIP_NOPS (expr);
3563
3564   if (SSA_VAR_P (expr))
3565     return zero_cost;
3566
3567   if (is_gimple_min_invariant (expr))
3568     {
3569       if (TREE_CODE (expr) == INTEGER_CST)
3570         return new_cost (integer_cost [speed], 0);
3571
3572       if (TREE_CODE (expr) == ADDR_EXPR)
3573         {
3574           tree obj = TREE_OPERAND (expr, 0);
3575
3576           if (TREE_CODE (obj) == VAR_DECL
3577               || TREE_CODE (obj) == PARM_DECL
3578               || TREE_CODE (obj) == RESULT_DECL)
3579             return new_cost (symbol_cost [speed], 0);
3580         }
3581
3582       return new_cost (address_cost [speed], 0);
3583     }
3584
3585   switch (TREE_CODE (expr))
3586     {
3587     case POINTER_PLUS_EXPR:
3588     case PLUS_EXPR:
3589     case MINUS_EXPR:
3590     case MULT_EXPR:
3591       op0 = TREE_OPERAND (expr, 0);
3592       op1 = TREE_OPERAND (expr, 1);
3593       STRIP_NOPS (op0);
3594       STRIP_NOPS (op1);
3595
3596       if (is_gimple_val (op0))
3597         cost0 = zero_cost;
3598       else
3599         cost0 = force_expr_to_var_cost (op0, speed);
3600
3601       if (is_gimple_val (op1))
3602         cost1 = zero_cost;
3603       else
3604         cost1 = force_expr_to_var_cost (op1, speed);
3605
3606       break;
3607
3608     case NEGATE_EXPR:
3609       op0 = TREE_OPERAND (expr, 0);
3610       STRIP_NOPS (op0);
3611       op1 = NULL_TREE;
3612
3613       if (is_gimple_val (op0))
3614         cost0 = zero_cost;
3615       else
3616         cost0 = force_expr_to_var_cost (op0, speed);
3617
3618       cost1 = zero_cost;
3619       break;
3620
3621     default:
3622       /* Just an arbitrary value, FIXME.  */
3623       return new_cost (target_spill_cost[speed], 0);
3624     }
3625
3626   mode = TYPE_MODE (TREE_TYPE (expr));
3627   switch (TREE_CODE (expr))
3628     {
3629     case POINTER_PLUS_EXPR:
3630     case PLUS_EXPR:
3631     case MINUS_EXPR:
3632     case NEGATE_EXPR:
3633       cost = new_cost (add_cost (mode, speed), 0);
3634       break;
3635
3636     case MULT_EXPR:
3637       if (cst_and_fits_in_hwi (op0))
3638         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3639       else if (cst_and_fits_in_hwi (op1))
3640         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3641       else
3642         return new_cost (target_spill_cost [speed], 0);
3643       break;
3644
3645     default:
3646       gcc_unreachable ();
3647     }
3648
3649   cost = add_costs (cost, cost0);
3650   cost = add_costs (cost, cost1);
3651
3652   /* Bound the cost by target_spill_cost.  The parts of complicated
3653      computations often are either loop invariant or at least can
3654      be shared between several iv uses, so letting this grow without
3655      limits would not give reasonable results.  */
3656   if (cost.cost > (int) target_spill_cost [speed])
3657     cost.cost = target_spill_cost [speed];
3658
3659   return cost;
3660 }
3661
3662 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3663    invariants the computation depends on.  */
3664
3665 static comp_cost
3666 force_var_cost (struct ivopts_data *data,
3667                 tree expr, bitmap *depends_on)
3668 {
3669   if (depends_on)
3670     {
3671       fd_ivopts_data = data;
3672       walk_tree (&expr, find_depends, depends_on, NULL);
3673     }
3674
3675   return force_expr_to_var_cost (expr, data->speed);
3676 }
3677
3678 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3679    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3680    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3681    invariants the computation depends on.  */
3682
3683 static comp_cost
3684 split_address_cost (struct ivopts_data *data,
3685                     tree addr, bool *symbol_present, bool *var_present,
3686                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3687 {
3688   tree core;
3689   HOST_WIDE_INT bitsize;
3690   HOST_WIDE_INT bitpos;
3691   tree toffset;
3692   enum machine_mode mode;
3693   int unsignedp, volatilep;
3694
3695   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3696                               &unsignedp, &volatilep, false);
3697
3698   if (toffset != 0
3699       || bitpos % BITS_PER_UNIT != 0
3700       || TREE_CODE (core) != VAR_DECL)
3701     {
3702       *symbol_present = false;
3703       *var_present = true;
3704       fd_ivopts_data = data;
3705       walk_tree (&addr, find_depends, depends_on, NULL);
3706       return new_cost (target_spill_cost[data->speed], 0);
3707     }
3708
3709   *offset += bitpos / BITS_PER_UNIT;
3710   if (TREE_STATIC (core)
3711       || DECL_EXTERNAL (core))
3712     {
3713       *symbol_present = true;
3714       *var_present = false;
3715       return zero_cost;
3716     }
3717
3718   *symbol_present = false;
3719   *var_present = true;
3720   return zero_cost;
3721 }
3722
3723 /* Estimates cost of expressing difference of addresses E1 - E2 as
3724    var + symbol + offset.  The value of offset is added to OFFSET,
3725    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3726    part is missing.  DEPENDS_ON is a set of the invariants the computation
3727    depends on.  */
3728
3729 static comp_cost
3730 ptr_difference_cost (struct ivopts_data *data,
3731                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3732                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3733 {
3734   HOST_WIDE_INT diff = 0;
3735   aff_tree aff_e1, aff_e2;
3736   tree type;
3737
3738   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3739
3740   if (ptr_difference_const (e1, e2, &diff))
3741     {
3742       *offset += diff;
3743       *symbol_present = false;
3744       *var_present = false;
3745       return zero_cost;
3746     }
3747
3748   if (integer_zerop (e2))
3749     return split_address_cost (data, TREE_OPERAND (e1, 0),
3750                                symbol_present, var_present, offset, depends_on);
3751
3752   *symbol_present = false;
3753   *var_present = true;
3754
3755   type = signed_type_for (TREE_TYPE (e1));
3756   tree_to_aff_combination (e1, type, &aff_e1);
3757   tree_to_aff_combination (e2, type, &aff_e2);
3758   aff_combination_scale (&aff_e2, double_int_minus_one);
3759   aff_combination_add (&aff_e1, &aff_e2);
3760
3761   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3762 }
3763
3764 /* Estimates cost of expressing difference E1 - E2 as
3765    var + symbol + offset.  The value of offset is added to OFFSET,
3766    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3767    part is missing.  DEPENDS_ON is a set of the invariants the computation
3768    depends on.  */
3769
3770 static comp_cost
3771 difference_cost (struct ivopts_data *data,
3772                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3773                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3774 {
3775   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3776   unsigned HOST_WIDE_INT off1, off2;
3777   aff_tree aff_e1, aff_e2;
3778   tree type;
3779
3780   e1 = strip_offset (e1, &off1);
3781   e2 = strip_offset (e2, &off2);
3782   *offset += off1 - off2;
3783
3784   STRIP_NOPS (e1);
3785   STRIP_NOPS (e2);
3786
3787   if (TREE_CODE (e1) == ADDR_EXPR)
3788     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3789                                 offset, depends_on);
3790   *symbol_present = false;
3791
3792   if (operand_equal_p (e1, e2, 0))
3793     {
3794       *var_present = false;
3795       return zero_cost;
3796     }
3797
3798   *var_present = true;
3799
3800   if (integer_zerop (e2))
3801     return force_var_cost (data, e1, depends_on);
3802
3803   if (integer_zerop (e1))
3804     {
3805       comp_cost cost = force_var_cost (data, e2, depends_on);
3806       cost.cost += multiply_by_cost (-1, mode, data->speed);
3807       return cost;
3808     }
3809
3810   type = signed_type_for (TREE_TYPE (e1));
3811   tree_to_aff_combination (e1, type, &aff_e1);
3812   tree_to_aff_combination (e2, type, &aff_e2);
3813   aff_combination_scale (&aff_e2, double_int_minus_one);
3814   aff_combination_add (&aff_e1, &aff_e2);
3815
3816   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3817 }
3818
3819 /* Returns true if AFF1 and AFF2 are identical.  */
3820
3821 static bool
3822 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3823 {
3824   unsigned i;
3825
3826   if (aff1->n != aff2->n)
3827     return false;
3828
3829   for (i = 0; i < aff1->n; i++)
3830     {
3831       if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3832         return false;
3833
3834       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3835         return false;
3836     }
3837   return true;
3838 }
3839
3840 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3841    requires a new compiler generated temporary.  Returns -1 otherwise.
3842    ADDRESS_P is a flag indicating if the expression is for address
3843    computation.  */
3844
3845 static int
3846 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3847                             tree cbase, HOST_WIDE_INT ratio,
3848                             bool address_p)
3849 {
3850   aff_tree ubase_aff, cbase_aff;
3851   tree expr, ub, cb;
3852   struct iv_inv_expr_ent ent;
3853   struct iv_inv_expr_ent **slot;
3854
3855   STRIP_NOPS (ubase);
3856   STRIP_NOPS (cbase);
3857   ub = ubase;
3858   cb = cbase;
3859
3860   if ((TREE_CODE (ubase) == INTEGER_CST)
3861       && (TREE_CODE (cbase) == INTEGER_CST))
3862     return -1;
3863
3864   /* Strips the constant part. */
3865   if (TREE_CODE (ubase) == PLUS_EXPR
3866       || TREE_CODE (ubase) == MINUS_EXPR
3867       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3868     {
3869       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3870         ubase = TREE_OPERAND (ubase, 0);
3871     }
3872
3873   /* Strips the constant part. */
3874   if (TREE_CODE (cbase) == PLUS_EXPR
3875       || TREE_CODE (cbase) == MINUS_EXPR
3876       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3877     {
3878       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3879         cbase = TREE_OPERAND (cbase, 0);
3880     }
3881
3882   if (address_p)
3883     {
3884       if (((TREE_CODE (ubase) == SSA_NAME)
3885            || (TREE_CODE (ubase) == ADDR_EXPR
3886                && is_gimple_min_invariant (ubase)))
3887           && (TREE_CODE (cbase) == INTEGER_CST))
3888         return -1;
3889
3890       if (((TREE_CODE (cbase) == SSA_NAME)
3891            || (TREE_CODE (cbase) == ADDR_EXPR
3892                && is_gimple_min_invariant (cbase)))
3893           && (TREE_CODE (ubase) == INTEGER_CST))
3894         return -1;
3895     }
3896
3897   if (ratio == 1)
3898     {
3899       if(operand_equal_p (ubase, cbase, 0))
3900         return -1;
3901
3902       if (TREE_CODE (ubase) == ADDR_EXPR
3903           && TREE_CODE (cbase) == ADDR_EXPR)
3904         {
3905           tree usym, csym;
3906
3907           usym = TREE_OPERAND (ubase, 0);
3908           csym = TREE_OPERAND (cbase, 0);
3909           if (TREE_CODE (usym) == ARRAY_REF)
3910             {
3911               tree ind = TREE_OPERAND (usym, 1);
3912               if (TREE_CODE (ind) == INTEGER_CST
3913                   && host_integerp (ind, 0)
3914                   && TREE_INT_CST_LOW (ind) == 0)
3915                 usym = TREE_OPERAND (usym, 0);
3916             }
3917           if (TREE_CODE (csym) == ARRAY_REF)
3918             {
3919               tree ind = TREE_OPERAND (csym, 1);
3920               if (TREE_CODE (ind) == INTEGER_CST
3921                   && host_integerp (ind, 0)
3922                   && TREE_INT_CST_LOW (ind) == 0)
3923                 csym = TREE_OPERAND (csym, 0);
3924             }
3925           if (operand_equal_p (usym, csym, 0))
3926             return -1;
3927         }
3928       /* Now do more complex comparison  */
3929       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3930       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3931       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3932         return -1;
3933     }
3934
3935   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3936   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3937
3938   aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3939   aff_combination_add (&ubase_aff, &cbase_aff);
3940   expr = aff_combination_to_tree (&ubase_aff);
3941   ent.expr = expr;
3942   ent.hash = iterative_hash_expr (expr, 0);
3943   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3944                                                      &ent, INSERT);
3945   if (*slot)
3946     return (*slot)->id;
3947
3948   *slot = XNEW (struct iv_inv_expr_ent);
3949   (*slot)->expr = expr;
3950   (*slot)->hash = ent.hash;
3951   (*slot)->id = data->inv_expr_id++;
3952   return  (*slot)->id;
3953 }
3954
3955
3956
3957 /* Determines the cost of the computation by that USE is expressed
3958    from induction variable CAND.  If ADDRESS_P is true, we just need
3959    to create an address from it, otherwise we want to get it into
3960    register.  A set of invariants we depend on is stored in
3961    DEPENDS_ON.  AT is the statement at that the value is computed.
3962    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3963    addressing is likely.  */
3964
3965 static comp_cost
3966 get_computation_cost_at (struct ivopts_data *data,
3967                          struct iv_use *use, struct iv_cand *cand,
3968                          bool address_p, bitmap *depends_on, gimple at,
3969                          bool *can_autoinc,
3970                          int *inv_expr_id)
3971 {
3972   tree ubase = use->iv->base, ustep = use->iv->step;
3973   tree cbase, cstep;
3974   tree utype = TREE_TYPE (ubase), ctype;
3975   unsigned HOST_WIDE_INT cstepi, offset = 0;
3976   HOST_WIDE_INT ratio, aratio;
3977   bool var_present, symbol_present, stmt_is_after_inc;
3978   comp_cost cost;
3979   double_int rat;
3980   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3981
3982   *depends_on = NULL;
3983
3984   /* Only consider real candidates.  */
3985   if (!cand->iv)
3986     return infinite_cost;
3987
3988   cbase = cand->iv->base;
3989   cstep = cand->iv->step;
3990   ctype = TREE_TYPE (cbase);
3991
3992   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3993     {
3994       /* We do not have a precision to express the values of use.  */
3995       return infinite_cost;
3996     }
3997
3998   if (address_p)
3999     {
4000       /* Do not try to express address of an object with computation based
4001          on address of a different object.  This may cause problems in rtl
4002          level alias analysis (that does not expect this to be happening,
4003          as this is illegal in C), and would be unlikely to be useful
4004          anyway.  */
4005       if (use->iv->base_object
4006           && cand->iv->base_object
4007           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4008         return infinite_cost;
4009     }
4010
4011   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4012     {
4013       /* TODO -- add direct handling of this case.  */
4014       goto fallback;
4015     }
4016
4017   /* CSTEPI is removed from the offset in case statement is after the
4018      increment.  If the step is not constant, we use zero instead.
4019      This is a bit imprecise (there is the extra addition), but
4020      redundancy elimination is likely to transform the code so that
4021      it uses value of the variable before increment anyway,
4022      so it is not that much unrealistic.  */
4023   if (cst_and_fits_in_hwi (cstep))
4024     cstepi = int_cst_value (cstep);
4025   else
4026     cstepi = 0;
4027
4028   if (!constant_multiple_of (ustep, cstep, &rat))
4029     return infinite_cost;
4030
4031   if (double_int_fits_in_shwi_p (rat))
4032     ratio = double_int_to_shwi (rat);
4033   else
4034     return infinite_cost;
4035
4036   STRIP_NOPS (cbase);
4037   ctype = TREE_TYPE (cbase);
4038
4039   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4040
4041   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4042      or ratio == 1, it is better to handle this like
4043
4044      ubase - ratio * cbase + ratio * var
4045
4046      (also holds in the case ratio == -1, TODO.  */
4047
4048   if (cst_and_fits_in_hwi (cbase))
4049     {
4050       offset = - ratio * int_cst_value (cbase);
4051       cost = difference_cost (data,
4052                               ubase, build_int_cst (utype, 0),
4053                               &symbol_present, &var_present, &offset,
4054                               depends_on);
4055       cost.cost /= avg_loop_niter (data->current_loop);
4056     }
4057   else if (ratio == 1)
4058     {
4059       tree real_cbase = cbase;
4060
4061       /* Check to see if any adjustment is needed.  */
4062       if (cstepi == 0 && stmt_is_after_inc)
4063         {
4064           aff_tree real_cbase_aff;
4065           aff_tree cstep_aff;
4066
4067           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4068                                    &real_cbase_aff);
4069           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4070
4071           aff_combination_add (&real_cbase_aff, &cstep_aff);
4072           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4073         }
4074
4075       cost = difference_cost (data,
4076                               ubase, real_cbase,
4077                               &symbol_present, &var_present, &offset,
4078                               depends_on);
4079       cost.cost /= avg_loop_niter (data->current_loop);
4080     }
4081   else if (address_p
4082            && !POINTER_TYPE_P (ctype)
4083            && multiplier_allowed_in_address_p
4084                 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4085                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4086     {
4087       cbase
4088         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4089       cost = difference_cost (data,
4090                               ubase, cbase,
4091                               &symbol_present, &var_present, &offset,
4092                               depends_on);
4093       cost.cost /= avg_loop_niter (data->current_loop);
4094     }
4095   else
4096     {
4097       cost = force_var_cost (data, cbase, depends_on);
4098       cost = add_costs (cost,
4099                         difference_cost (data,
4100                                          ubase, build_int_cst (utype, 0),
4101                                          &symbol_present, &var_present,
4102                                          &offset, depends_on));
4103       cost.cost /= avg_loop_niter (data->current_loop);
4104       cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4105     }
4106
4107   if (inv_expr_id)
4108     {
4109       *inv_expr_id =
4110           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4111       /* Clear depends on.  */
4112       if (*inv_expr_id != -1 && depends_on && *depends_on)
4113         bitmap_clear (*depends_on);
4114     }
4115
4116   /* If we are after the increment, the value of the candidate is higher by
4117      one iteration.  */
4118   if (stmt_is_after_inc)
4119     offset -= ratio * cstepi;
4120
4121   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4122      (symbol/var1/const parts may be omitted).  If we are looking for an
4123      address, find the cost of addressing this.  */
4124   if (address_p)
4125     return add_costs (cost,
4126                       get_address_cost (symbol_present, var_present,
4127                                         offset, ratio, cstepi,
4128                                         TYPE_MODE (TREE_TYPE (utype)),
4129                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4130                                         speed, stmt_is_after_inc,
4131                                         can_autoinc));
4132
4133   /* Otherwise estimate the costs for computing the expression.  */
4134   if (!symbol_present && !var_present && !offset)
4135     {
4136       if (ratio != 1)
4137         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4138       return cost;
4139     }
4140
4141   /* Symbol + offset should be compile-time computable so consider that they
4142       are added once to the variable, if present.  */
4143   if (var_present && (symbol_present || offset))
4144     cost.cost += adjust_setup_cost (data,
4145                                     add_cost (TYPE_MODE (ctype), speed));
4146
4147   /* Having offset does not affect runtime cost in case it is added to
4148      symbol, but it increases complexity.  */
4149   if (offset)
4150     cost.complexity++;
4151
4152   cost.cost += add_cost (TYPE_MODE (ctype), speed);
4153
4154   aratio = ratio > 0 ? ratio : -ratio;
4155   if (aratio != 1)
4156     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4157   return cost;
4158
4159 fallback:
4160   if (can_autoinc)
4161     *can_autoinc = false;
4162
4163   {
4164     /* Just get the expression, expand it and measure the cost.  */
4165     tree comp = get_computation_at (data->current_loop, use, cand, at);
4166
4167     if (!comp)
4168       return infinite_cost;
4169
4170     if (address_p)
4171       comp = build_simple_mem_ref (comp);
4172
4173     return new_cost (computation_cost (comp, speed), 0);
4174   }
4175 }
4176
4177 /* Determines the cost of the computation by that USE is expressed
4178    from induction variable CAND.  If ADDRESS_P is true, we just need
4179    to create an address from it, otherwise we want to get it into
4180    register.  A set of invariants we depend on is stored in
4181    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4182    autoinc addressing is likely.  */
4183
4184 static comp_cost
4185 get_computation_cost (struct ivopts_data *data,
4186                       struct iv_use *use, struct iv_cand *cand,
4187                       bool address_p, bitmap *depends_on,
4188                       bool *can_autoinc, int *inv_expr_id)
4189 {
4190   return get_computation_cost_at (data,
4191                                   use, cand, address_p, depends_on, use->stmt,
4192                                   can_autoinc, inv_expr_id);
4193 }
4194
4195 /* Determines cost of basing replacement of USE on CAND in a generic
4196    expression.  */
4197
4198 static bool
4199 determine_use_iv_cost_generic (struct ivopts_data *data,
4200                                struct iv_use *use, struct iv_cand *cand)
4201 {
4202   bitmap depends_on;
4203   comp_cost cost;
4204   int inv_expr_id = -1;
4205
4206   /* The simple case first -- if we need to express value of the preserved
4207      original biv, the cost is 0.  This also prevents us from counting the
4208      cost of increment twice -- once at this use and once in the cost of
4209      the candidate.  */
4210   if (cand->pos == IP_ORIGINAL
4211       && cand->incremented_at == use->stmt)
4212     {
4213       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4214       return true;
4215     }
4216
4217   cost = get_computation_cost (data, use, cand, false, &depends_on,
4218                                NULL, &inv_expr_id);
4219
4220   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4221                    inv_expr_id);
4222
4223   return !infinite_cost_p (cost);
4224 }
4225
4226 /* Determines cost of basing replacement of USE on CAND in an address.  */
4227
4228 static bool
4229 determine_use_iv_cost_address (struct ivopts_data *data,
4230                                struct iv_use *use, struct iv_cand *cand)
4231 {
4232   bitmap depends_on;
4233   bool can_autoinc;
4234   int inv_expr_id = -1;
4235   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4236                                          &can_autoinc, &inv_expr_id);
4237
4238   if (cand->ainc_use == use)
4239     {
4240       if (can_autoinc)
4241         cost.cost -= cand->cost_step;
4242       /* If we generated the candidate solely for exploiting autoincrement
4243          opportunities, and it turns out it can't be used, set the cost to
4244          infinity to make sure we ignore it.  */
4245       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4246         cost = infinite_cost;
4247     }
4248   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4249                    inv_expr_id);
4250
4251   return !infinite_cost_p (cost);
4252 }
4253
4254 /* Computes value of candidate CAND at position AT in iteration NITER, and
4255    stores it to VAL.  */
4256
4257 static void
4258 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4259                aff_tree *val)
4260 {
4261   aff_tree step, delta, nit;
4262   struct iv *iv = cand->iv;
4263   tree type = TREE_TYPE (iv->base);
4264   tree steptype = type;
4265   if (POINTER_TYPE_P (type))
4266     steptype = sizetype;
4267
4268   tree_to_aff_combination (iv->step, steptype, &step);
4269   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4270   aff_combination_convert (&nit, steptype);
4271   aff_combination_mult (&nit, &step, &delta);
4272   if (stmt_after_increment (loop, cand, at))
4273     aff_combination_add (&delta, &step);
4274
4275   tree_to_aff_combination (iv->base, type, val);
4276   aff_combination_add (val, &delta);
4277 }
4278
4279 /* Returns period of induction variable iv.  */
4280
4281 static tree
4282 iv_period (struct iv *iv)
4283 {
4284   tree step = iv->step, period, type;
4285   tree pow2div;
4286
4287   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4288
4289   type = unsigned_type_for (TREE_TYPE (step));
4290   /* Period of the iv is lcm (step, type_range)/step -1,
4291      i.e., N*type_range/step - 1. Since type range is power
4292      of two, N == (step >> num_of_ending_zeros_binary (step),
4293      so the final result is
4294
4295        (type_range >> num_of_ending_zeros_binary (step)) - 1
4296
4297   */
4298   pow2div = num_ending_zeros (step);
4299
4300   period = build_low_bits_mask (type,
4301                                 (TYPE_PRECISION (type)
4302                                  - tree_low_cst (pow2div, 1)));
4303
4304   return period;
4305 }
4306
4307 /* Returns the comparison operator used when eliminating the iv USE.  */
4308
4309 static enum tree_code
4310 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4311 {
4312   struct loop *loop = data->current_loop;
4313   basic_block ex_bb;
4314   edge exit;
4315
4316   ex_bb = gimple_bb (use->stmt);
4317   exit = EDGE_SUCC (ex_bb, 0);
4318   if (flow_bb_inside_loop_p (loop, exit->dest))
4319     exit = EDGE_SUCC (ex_bb, 1);
4320
4321   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4322 }
4323
4324 /* Check whether it is possible to express the condition in USE by comparison
4325    of candidate CAND.  If so, store the value compared with to BOUND.  */
4326
4327 static bool
4328 may_eliminate_iv (struct ivopts_data *data,
4329                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4330 {
4331   basic_block ex_bb;
4332   edge exit;
4333   tree nit, period;
4334   struct loop *loop = data->current_loop;
4335   aff_tree bnd;
4336   struct tree_niter_desc *desc = NULL;
4337
4338   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4339     return false;
4340
4341   /* For now works only for exits that dominate the loop latch.
4342      TODO: extend to other conditions inside loop body.  */
4343   ex_bb = gimple_bb (use->stmt);
4344   if (use->stmt != last_stmt (ex_bb)
4345       || gimple_code (use->stmt) != GIMPLE_COND
4346       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4347     return false;
4348
4349   exit = EDGE_SUCC (ex_bb, 0);
4350   if (flow_bb_inside_loop_p (loop, exit->dest))
4351     exit = EDGE_SUCC (ex_bb, 1);
4352   if (flow_bb_inside_loop_p (loop, exit->dest))
4353     return false;
4354
4355   nit = niter_for_exit (data, exit, &desc);
4356   if (!nit)
4357     return false;
4358
4359   /* Determine whether we can use the variable to test the exit condition.
4360      This is the case iff the period of the induction variable is greater
4361      than the number of iterations for which the exit condition is true.  */
4362   period = iv_period (cand->iv);
4363
4364   /* If the number of iterations is constant, compare against it directly.  */
4365   if (TREE_CODE (nit) == INTEGER_CST)
4366     {
4367       /* See cand_value_at.  */
4368       if (stmt_after_increment (loop, cand, use->stmt))
4369         {
4370           if (!tree_int_cst_lt (nit, period))
4371             return false;
4372         }
4373       else
4374         {
4375           if (tree_int_cst_lt (period, nit))
4376             return false;
4377         }
4378     }
4379
4380   /* If not, and if this is the only possible exit of the loop, see whether
4381      we can get a conservative estimate on the number of iterations of the
4382      entire loop and compare against that instead.  */
4383   else
4384     {
4385       double_int period_value, max_niter;
4386
4387       max_niter = desc->max;
4388       if (stmt_after_increment (loop, cand, use->stmt))
4389         max_niter = double_int_add (max_niter, double_int_one);
4390       period_value = tree_to_double_int (period);
4391       if (double_int_ucmp (max_niter, period_value) > 0)
4392         {
4393           /* See if we can take advantage of infered loop bound information.  */
4394           if (loop_only_exit_p (loop, exit))
4395             {
4396               if (!estimated_loop_iterations (loop, true, &max_niter))
4397                 return false;
4398               /* The loop bound is already adjusted by adding 1.  */
4399               if (double_int_ucmp (max_niter, period_value) > 0)
4400                 return false;
4401             }
4402           else
4403             return false;
4404         }
4405     }
4406
4407   cand_value_at (loop, cand, use->stmt, nit, &bnd);
4408
4409   *bound = aff_combination_to_tree (&bnd);
4410   /* It is unlikely that computing the number of iterations using division
4411      would be more profitable than keeping the original induction variable.  */
4412   if (expression_expensive_p (*bound))
4413     return false;
4414   return true;
4415 }
4416
4417
4418 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4419
4420 static bool
4421 determine_use_iv_cost_condition (struct ivopts_data *data,
4422                                  struct iv_use *use, struct iv_cand *cand)
4423 {
4424   tree bound = NULL_TREE;
4425   struct iv *cmp_iv;
4426   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4427   comp_cost elim_cost, express_cost, cost;
4428   bool ok;
4429   int inv_expr_id = -1;
4430   tree *control_var, *bound_cst;
4431
4432   /* Only consider real candidates.  */
4433   if (!cand->iv)
4434     {
4435       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4436       return false;
4437     }
4438
4439   /* Try iv elimination.  */
4440   if (may_eliminate_iv (data, use, cand, &bound))
4441     {
4442       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4443       /* The bound is a loop invariant, so it will be only computed
4444          once.  */
4445       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4446     }
4447   else
4448     elim_cost = infinite_cost;
4449
4450   /* Try expressing the original giv.  If it is compared with an invariant,
4451      note that we cannot get rid of it.  */
4452   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4453                               NULL, &cmp_iv);
4454   gcc_assert (ok);
4455
4456   /* When the condition is a comparison of the candidate IV against
4457      zero, prefer this IV.
4458
4459      TODO: The constant that we're substracting from the cost should
4460      be target-dependent.  This information should be added to the
4461      target costs for each backend.  */
4462   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4463       && integer_zerop (*bound_cst)
4464       && (operand_equal_p (*control_var, cand->var_after, 0)
4465           || operand_equal_p (*control_var, cand->var_before, 0)))
4466     elim_cost.cost -= 1;
4467
4468   express_cost = get_computation_cost (data, use, cand, false,
4469                                        &depends_on_express, NULL,
4470                                        &inv_expr_id);
4471   fd_ivopts_data = data;
4472   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4473
4474   /* Choose the better approach, preferring the eliminated IV. */
4475   if (compare_costs (elim_cost, express_cost) <= 0)
4476     {
4477       cost = elim_cost;
4478       depends_on = depends_on_elim;
4479       depends_on_elim = NULL;
4480     }
4481   else
4482     {
4483       cost = express_cost;
4484       depends_on = depends_on_express;
4485       depends_on_express = NULL;
4486       bound = NULL_TREE;
4487     }
4488
4489   set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4490
4491   if (depends_on_elim)
4492     BITMAP_FREE (depends_on_elim);
4493   if (depends_on_express)
4494     BITMAP_FREE (depends_on_express);
4495
4496   return !infinite_cost_p (cost);
4497 }
4498
4499 /* Determines cost of basing replacement of USE on CAND.  Returns false
4500    if USE cannot be based on CAND.  */
4501
4502 static bool
4503 determine_use_iv_cost (struct ivopts_data *data,
4504                        struct iv_use *use, struct iv_cand *cand)
4505 {
4506   switch (use->type)
4507     {
4508     case USE_NONLINEAR_EXPR:
4509       return determine_use_iv_cost_generic (data, use, cand);
4510
4511     case USE_ADDRESS:
4512       return determine_use_iv_cost_address (data, use, cand);
4513
4514     case USE_COMPARE:
4515       return determine_use_iv_cost_condition (data, use, cand);
4516
4517     default:
4518       gcc_unreachable ();
4519     }
4520 }
4521
4522 /* Return true if get_computation_cost indicates that autoincrement is
4523    a possibility for the pair of USE and CAND, false otherwise.  */
4524
4525 static bool
4526 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4527                            struct iv_cand *cand)
4528 {
4529   bitmap depends_on;
4530   bool can_autoinc;
4531   comp_cost cost;
4532
4533   if (use->type != USE_ADDRESS)
4534     return false;
4535
4536   cost = get_computation_cost (data, use, cand, true, &depends_on,
4537                                &can_autoinc, NULL);
4538
4539   BITMAP_FREE (depends_on);
4540
4541   return !infinite_cost_p (cost) && can_autoinc;
4542 }
4543
4544 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4545    use that allows autoincrement, and set their AINC_USE if possible.  */
4546
4547 static void
4548 set_autoinc_for_original_candidates (struct ivopts_data *data)
4549 {
4550   unsigned i, j;
4551
4552   for (i = 0; i < n_iv_cands (data); i++)
4553     {
4554       struct iv_cand *cand = iv_cand (data, i);
4555       struct iv_use *closest = NULL;
4556       if (cand->pos != IP_ORIGINAL)
4557         continue;
4558       for (j = 0; j < n_iv_uses (data); j++)
4559         {
4560           struct iv_use *use = iv_use (data, j);
4561           unsigned uid = gimple_uid (use->stmt);
4562           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4563               || uid > gimple_uid (cand->incremented_at))
4564             continue;
4565           if (closest == NULL || uid > gimple_uid (closest->stmt))
4566             closest = use;
4567         }
4568       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4569         continue;
4570       cand->ainc_use = closest;
4571     }
4572 }
4573
4574 /* Finds the candidates for the induction variables.  */
4575
4576 static void
4577 find_iv_candidates (struct ivopts_data *data)
4578 {
4579   /* Add commonly used ivs.  */
4580   add_standard_iv_candidates (data);
4581
4582   /* Add old induction variables.  */
4583   add_old_ivs_candidates (data);
4584
4585   /* Add induction variables derived from uses.  */
4586   add_derived_ivs_candidates (data);
4587
4588   set_autoinc_for_original_candidates (data);
4589
4590   /* Record the important candidates.  */
4591   record_important_candidates (data);
4592 }
4593
4594 /* Determines costs of basing the use of the iv on an iv candidate.  */
4595
4596 static void
4597 determine_use_iv_costs (struct ivopts_data *data)
4598 {
4599   unsigned i, j;
4600   struct iv_use *use;
4601   struct iv_cand *cand;
4602   bitmap to_clear = BITMAP_ALLOC (NULL);
4603
4604   alloc_use_cost_map (data);
4605
4606   for (i = 0; i < n_iv_uses (data); i++)
4607     {
4608       use = iv_use (data, i);
4609
4610       if (data->consider_all_candidates)
4611         {
4612           for (j = 0; j < n_iv_cands (data); j++)
4613             {
4614               cand = iv_cand (data, j);
4615               determine_use_iv_cost (data, use, cand);
4616             }
4617         }
4618       else
4619         {
4620           bitmap_iterator bi;
4621
4622           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4623             {
4624               cand = iv_cand (data, j);
4625               if (!determine_use_iv_cost (data, use, cand))
4626                 bitmap_set_bit (to_clear, j);
4627             }
4628
4629           /* Remove the candidates for that the cost is infinite from
4630              the list of related candidates.  */
4631           bitmap_and_compl_into (use->related_cands, to_clear);
4632           bitmap_clear (to_clear);
4633         }
4634     }
4635
4636   BITMAP_FREE (to_clear);
4637
4638   if (dump_file && (dump_flags & TDF_DETAILS))
4639     {
4640       fprintf (dump_file, "Use-candidate costs:\n");
4641
4642       for (i = 0; i < n_iv_uses (data); i++)
4643         {
4644           use = iv_use (data, i);
4645
4646           fprintf (dump_file, "Use %d:\n", i);
4647           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4648           for (j = 0; j < use->n_map_members; j++)
4649             {
4650               if (!use->cost_map[j].cand
4651                   || infinite_cost_p (use->cost_map[j].cost))
4652                 continue;
4653
4654               fprintf (dump_file, "  %d\t%d\t%d\t",
4655                        use->cost_map[j].cand->id,
4656                        use->cost_map[j].cost.cost,
4657                        use->cost_map[j].cost.complexity);
4658               if (use->cost_map[j].depends_on)
4659                 bitmap_print (dump_file,
4660                               use->cost_map[j].depends_on, "","");
4661               if (use->cost_map[j].inv_expr_id != -1)
4662                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4663               fprintf (dump_file, "\n");
4664             }
4665
4666           fprintf (dump_file, "\n");
4667         }
4668       fprintf (dump_file, "\n");
4669     }
4670 }
4671
4672 /* Determines cost of the candidate CAND.  */
4673
4674 static void
4675 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4676 {
4677   comp_cost cost_base;
4678   unsigned cost, cost_step;
4679   tree base;
4680
4681   if (!cand->iv)
4682     {
4683       cand->cost = 0;
4684       return;
4685     }
4686
4687   /* There are two costs associated with the candidate -- its increment
4688      and its initialization.  The second is almost negligible for any loop
4689      that rolls enough, so we take it just very little into account.  */
4690
4691   base = cand->iv->base;
4692   cost_base = force_var_cost (data, base, NULL);
4693   /* It will be exceptional that the iv register happens to be initialized with
4694      the proper value at no cost.  In general, there will at least be a regcopy
4695      or a const set.  */
4696   if (cost_base.cost == 0)
4697     cost_base.cost = COSTS_N_INSNS (1);
4698   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4699
4700   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4701
4702   /* Prefer the original ivs unless we may gain something by replacing it.
4703      The reason is to make debugging simpler; so this is not relevant for
4704      artificial ivs created by other optimization passes.  */
4705   if (cand->pos != IP_ORIGINAL
4706       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4707     cost++;
4708
4709   /* Prefer not to insert statements into latch unless there are some
4710      already (so that we do not create unnecessary jumps).  */
4711   if (cand->pos == IP_END
4712       && empty_block_p (ip_end_pos (data->current_loop)))
4713     cost++;
4714
4715   cand->cost = cost;
4716   cand->cost_step = cost_step;
4717 }
4718
4719 /* Determines costs of computation of the candidates.  */
4720
4721 static void
4722 determine_iv_costs (struct ivopts_data *data)
4723 {
4724   unsigned i;
4725
4726   if (dump_file && (dump_flags & TDF_DETAILS))
4727     {
4728       fprintf (dump_file, "Candidate costs:\n");
4729       fprintf (dump_file, "  cand\tcost\n");
4730     }
4731
4732   for (i = 0; i < n_iv_cands (data); i++)
4733     {
4734       struct iv_cand *cand = iv_cand (data, i);
4735
4736       determine_iv_cost (data, cand);
4737
4738       if (dump_file && (dump_flags & TDF_DETAILS))
4739         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4740     }
4741
4742   if (dump_file && (dump_flags & TDF_DETAILS))
4743     fprintf (dump_file, "\n");
4744 }
4745
4746 /* Calculates cost for having SIZE induction variables.  */
4747
4748 static unsigned
4749 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4750 {
4751   /* We add size to the cost, so that we prefer eliminating ivs
4752      if possible.  */
4753   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4754                                             data->body_includes_call);
4755 }
4756
4757 /* For each size of the induction variable set determine the penalty.  */
4758
4759 static void
4760 determine_set_costs (struct ivopts_data *data)
4761 {
4762   unsigned j, n;
4763   gimple phi;
4764   gimple_stmt_iterator psi;
4765   tree op;
4766   struct loop *loop = data->current_loop;
4767   bitmap_iterator bi;
4768
4769   if (dump_file && (dump_flags & TDF_DETAILS))
4770     {
4771       fprintf (dump_file, "Global costs:\n");
4772       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4773       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
4774       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
4775       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
4776     }
4777
4778   n = 0;
4779   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4780     {
4781       phi = gsi_stmt (psi);
4782       op = PHI_RESULT (phi);
4783
4784       if (!is_gimple_reg (op))
4785         continue;
4786
4787       if (get_iv (data, op))
4788         continue;
4789
4790       n++;
4791     }
4792
4793   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4794     {
4795       struct version_info *info = ver_info (data, j);
4796
4797       if (info->inv_id && info->has_nonlin_use)
4798         n++;
4799     }
4800
4801   data->regs_used = n;
4802   if (dump_file && (dump_flags & TDF_DETAILS))
4803     fprintf (dump_file, "  regs_used %d\n", n);
4804
4805   if (dump_file && (dump_flags & TDF_DETAILS))
4806     {
4807       fprintf (dump_file, "  cost for size:\n");
4808       fprintf (dump_file, "  ivs\tcost\n");
4809       for (j = 0; j <= 2 * target_avail_regs; j++)
4810         fprintf (dump_file, "  %d\t%d\n", j,
4811                  ivopts_global_cost_for_size (data, j));
4812       fprintf (dump_file, "\n");
4813     }
4814 }
4815
4816 /* Returns true if A is a cheaper cost pair than B.  */
4817
4818 static bool
4819 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4820 {
4821   int cmp;
4822
4823   if (!a)
4824     return false;
4825
4826   if (!b)
4827     return true;
4828
4829   cmp = compare_costs (a->cost, b->cost);
4830   if (cmp < 0)
4831     return true;
4832
4833   if (cmp > 0)
4834     return false;
4835
4836   /* In case the costs are the same, prefer the cheaper candidate.  */
4837   if (a->cand->cost < b->cand->cost)
4838     return true;
4839
4840   return false;
4841 }
4842
4843
4844 /* Returns candidate by that USE is expressed in IVS.  */
4845
4846 static struct cost_pair *
4847 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4848 {
4849   return ivs->cand_for_use[use->id];
4850 }
4851
4852 /* Computes the cost field of IVS structure.  */
4853
4854 static void
4855 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4856 {
4857   comp_cost cost = ivs->cand_use_cost;
4858
4859   cost.cost += ivs->cand_cost;
4860
4861   cost.cost += ivopts_global_cost_for_size (data,
4862                                             ivs->n_regs + ivs->num_used_inv_expr);
4863
4864   ivs->cost = cost;
4865 }
4866
4867 /* Remove invariants in set INVS to set IVS.  */
4868
4869 static void
4870 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4871 {
4872   bitmap_iterator bi;
4873   unsigned iid;
4874
4875   if (!invs)
4876     return;
4877
4878   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4879     {
4880       ivs->n_invariant_uses[iid]--;
4881       if (ivs->n_invariant_uses[iid] == 0)
4882         ivs->n_regs--;
4883     }
4884 }
4885
4886 /* Set USE not to be expressed by any candidate in IVS.  */
4887
4888 static void
4889 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4890                  struct iv_use *use)
4891 {
4892   unsigned uid = use->id, cid;
4893   struct cost_pair *cp;
4894
4895   cp = ivs->cand_for_use[uid];
4896   if (!cp)
4897     return;
4898   cid = cp->cand->id;
4899
4900   ivs->bad_uses++;
4901   ivs->cand_for_use[uid] = NULL;
4902   ivs->n_cand_uses[cid]--;
4903
4904   if (ivs->n_cand_uses[cid] == 0)
4905     {
4906       bitmap_clear_bit (ivs->cands, cid);
4907       /* Do not count the pseudocandidates.  */
4908       if (cp->cand->iv)
4909         ivs->n_regs--;
4910       ivs->n_cands--;
4911       ivs->cand_cost -= cp->cand->cost;
4912
4913       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4914     }
4915
4916   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4917
4918   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4919
4920   if (cp->inv_expr_id != -1)
4921     {
4922       ivs->used_inv_expr[cp->inv_expr_id]--;
4923       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
4924         ivs->num_used_inv_expr--;
4925     }
4926   iv_ca_recount_cost (data, ivs);
4927 }
4928
4929 /* Add invariants in set INVS to set IVS.  */
4930
4931 static void
4932 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4933 {
4934   bitmap_iterator bi;
4935   unsigned iid;
4936
4937   if (!invs)
4938     return;
4939
4940   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4941     {
4942       ivs->n_invariant_uses[iid]++;
4943       if (ivs->n_invariant_uses[iid] == 1)
4944         ivs->n_regs++;
4945     }
4946 }
4947
4948 /* Set cost pair for USE in set IVS to CP.  */
4949
4950 static void
4951 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4952               struct iv_use *use, struct cost_pair *cp)
4953 {
4954   unsigned uid = use->id, cid;
4955
4956   if (ivs->cand_for_use[uid] == cp)
4957     return;
4958
4959   if (ivs->cand_for_use[uid])
4960     iv_ca_set_no_cp (data, ivs, use);
4961
4962   if (cp)
4963     {
4964       cid = cp->cand->id;
4965
4966       ivs->bad_uses--;
4967       ivs->cand_for_use[uid] = cp;
4968       ivs->n_cand_uses[cid]++;
4969       if (ivs->n_cand_uses[cid] == 1)
4970         {
4971           bitmap_set_bit (ivs->cands, cid);
4972           /* Do not count the pseudocandidates.  */
4973           if (cp->cand->iv)
4974             ivs->n_regs++;
4975           ivs->n_cands++;
4976           ivs->cand_cost += cp->cand->cost;
4977
4978           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4979         }
4980
4981       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4982       iv_ca_set_add_invariants (ivs, cp->depends_on);
4983
4984       if (cp->inv_expr_id != -1)
4985         {
4986           ivs->used_inv_expr[cp->inv_expr_id]++;
4987           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
4988             ivs->num_used_inv_expr++;
4989         }
4990       iv_ca_recount_cost (data, ivs);
4991     }
4992 }
4993
4994 /* Extend set IVS by expressing USE by some of the candidates in it
4995    if possible. All important candidates will be considered
4996    if IMPORTANT_CANDIDATES is true.  */
4997
4998 static void
4999 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5000                struct iv_use *use, bool important_candidates)
5001 {
5002   struct cost_pair *best_cp = NULL, *cp;
5003   bitmap_iterator bi;
5004   bitmap cands;
5005   unsigned i;
5006
5007   gcc_assert (ivs->upto >= use->id);
5008
5009   if (ivs->upto == use->id)
5010     {
5011       ivs->upto++;
5012       ivs->bad_uses++;
5013     }
5014
5015   cands = (important_candidates ? data->important_candidates : ivs->cands);
5016   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5017     {
5018       struct iv_cand *cand = iv_cand (data, i);
5019
5020       cp = get_use_iv_cost (data, use, cand);
5021
5022       if (cheaper_cost_pair (cp, best_cp))
5023         best_cp = cp;
5024     }
5025
5026   iv_ca_set_cp (data, ivs, use, best_cp);
5027 }
5028
5029 /* Get cost for assignment IVS.  */
5030
5031 static comp_cost
5032 iv_ca_cost (struct iv_ca *ivs)
5033 {
5034   /* This was a conditional expression but it triggered a bug in
5035      Sun C 5.5.  */
5036   if (ivs->bad_uses)
5037     return infinite_cost;
5038   else
5039     return ivs->cost;
5040 }
5041
5042 /* Returns true if all dependences of CP are among invariants in IVS.  */
5043
5044 static bool
5045 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5046 {
5047   unsigned i;
5048   bitmap_iterator bi;
5049
5050   if (!cp->depends_on)
5051     return true;
5052
5053   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5054     {
5055       if (ivs->n_invariant_uses[i] == 0)
5056         return false;
5057     }
5058
5059   return true;
5060 }
5061
5062 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5063    it before NEXT_CHANGE.  */
5064
5065 static struct iv_ca_delta *
5066 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5067                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5068 {
5069   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5070
5071   change->use = use;
5072   change->old_cp = old_cp;
5073   change->new_cp = new_cp;
5074   change->next_change = next_change;
5075
5076   return change;
5077 }
5078
5079 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5080    are rewritten.  */
5081
5082 static struct iv_ca_delta *
5083 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5084 {
5085   struct iv_ca_delta *last;
5086
5087   if (!l2)
5088     return l1;
5089
5090   if (!l1)
5091     return l2;
5092
5093   for (last = l1; last->next_change; last = last->next_change)
5094     continue;
5095   last->next_change = l2;
5096
5097   return l1;
5098 }
5099
5100 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5101
5102 static struct iv_ca_delta *
5103 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5104 {
5105   struct iv_ca_delta *act, *next, *prev = NULL;
5106   struct cost_pair *tmp;
5107
5108   for (act = delta; act; act = next)
5109     {
5110       next = act->next_change;
5111       act->next_change = prev;
5112       prev = act;
5113
5114       tmp = act->old_cp;
5115       act->old_cp = act->new_cp;
5116       act->new_cp = tmp;
5117     }
5118
5119   return prev;
5120 }
5121
5122 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5123    reverted instead.  */
5124
5125 static void
5126 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5127                     struct iv_ca_delta *delta, bool forward)
5128 {
5129   struct cost_pair *from, *to;
5130   struct iv_ca_delta *act;
5131
5132   if (!forward)
5133     delta = iv_ca_delta_reverse (delta);
5134
5135   for (act = delta; act; act = act->next_change)
5136     {
5137       from = act->old_cp;
5138       to = act->new_cp;
5139       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5140       iv_ca_set_cp (data, ivs, act->use, to);
5141     }
5142
5143   if (!forward)
5144     iv_ca_delta_reverse (delta);
5145 }
5146
5147 /* Returns true if CAND is used in IVS.  */
5148
5149 static bool
5150 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5151 {
5152   return ivs->n_cand_uses[cand->id] > 0;
5153 }
5154
5155 /* Returns number of induction variable candidates in the set IVS.  */
5156
5157 static unsigned
5158 iv_ca_n_cands (struct iv_ca *ivs)
5159 {
5160   return ivs->n_cands;
5161 }
5162
5163 /* Free the list of changes DELTA.  */
5164
5165 static void
5166 iv_ca_delta_free (struct iv_ca_delta **delta)
5167 {
5168   struct iv_ca_delta *act, *next;
5169
5170   for (act = *delta; act; act = next)
5171     {
5172       next = act->next_change;
5173       free (act);
5174     }
5175
5176   *delta = NULL;
5177 }
5178
5179 /* Allocates new iv candidates assignment.  */
5180
5181 static struct iv_ca *
5182 iv_ca_new (struct ivopts_data *data)
5183 {
5184   struct iv_ca *nw = XNEW (struct iv_ca);
5185
5186   nw->upto = 0;
5187   nw->bad_uses = 0;
5188   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5189   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5190   nw->cands = BITMAP_ALLOC (NULL);
5191   nw->n_cands = 0;
5192   nw->n_regs = 0;
5193   nw->cand_use_cost = zero_cost;
5194   nw->cand_cost = 0;
5195   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5196   nw->cost = zero_cost;
5197   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5198   nw->num_used_inv_expr = 0;
5199
5200   return nw;
5201 }
5202
5203 /* Free memory occupied by the set IVS.  */
5204
5205 static void
5206 iv_ca_free (struct iv_ca **ivs)
5207 {
5208   free ((*ivs)->cand_for_use);
5209   free ((*ivs)->n_cand_uses);
5210   BITMAP_FREE ((*ivs)->cands);
5211   free ((*ivs)->n_invariant_uses);
5212   free ((*ivs)->used_inv_expr);
5213   free (*ivs);
5214   *ivs = NULL;
5215 }
5216
5217 /* Dumps IVS to FILE.  */
5218
5219 static void
5220 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5221 {
5222   const char *pref = "  invariants ";
5223   unsigned i;
5224   comp_cost cost = iv_ca_cost (ivs);
5225
5226   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5227   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5228            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5229   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5230
5231    for (i = 0; i < ivs->upto; i++)
5232     {
5233       struct iv_use *use = iv_use (data, i);
5234       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5235       if (cp)
5236         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5237                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5238       else
5239         fprintf (file, "   use:%d --> ??\n", use->id);
5240     }
5241
5242   for (i = 1; i <= data->max_inv_id; i++)
5243     if (ivs->n_invariant_uses[i])
5244       {
5245         fprintf (file, "%s%d", pref, i);
5246         pref = ", ";
5247       }
5248   fprintf (file, "\n\n");
5249 }
5250
5251 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5252    new set, and store differences in DELTA.  Number of induction variables
5253    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5254    the function will try to find a solution with mimimal iv candidates.  */
5255
5256 static comp_cost
5257 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5258               struct iv_cand *cand, struct iv_ca_delta **delta,
5259               unsigned *n_ivs, bool min_ncand)
5260 {
5261   unsigned i;
5262   comp_cost cost;
5263   struct iv_use *use;
5264   struct cost_pair *old_cp, *new_cp;
5265
5266   *delta = NULL;
5267   for (i = 0; i < ivs->upto; i++)
5268     {
5269       use = iv_use (data, i);
5270       old_cp = iv_ca_cand_for_use (ivs, use);
5271
5272       if (old_cp
5273           && old_cp->cand == cand)
5274         continue;
5275
5276       new_cp = get_use_iv_cost (data, use, cand);
5277       if (!new_cp)
5278         continue;
5279
5280       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5281         continue;
5282
5283       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5284         continue;
5285
5286       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5287     }
5288
5289   iv_ca_delta_commit (data, ivs, *delta, true);
5290   cost = iv_ca_cost (ivs);
5291   if (n_ivs)
5292     *n_ivs = iv_ca_n_cands (ivs);
5293   iv_ca_delta_commit (data, ivs, *delta, false);
5294
5295   return cost;
5296 }
5297
5298 /* Try narrowing set IVS by removing CAND.  Return the cost of
5299    the new set and store the differences in DELTA.  */
5300
5301 static comp_cost
5302 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5303               struct iv_cand *cand, struct iv_ca_delta **delta)
5304 {
5305   unsigned i, ci;
5306   struct iv_use *use;
5307   struct cost_pair *old_cp, *new_cp, *cp;
5308   bitmap_iterator bi;
5309   struct iv_cand *cnd;
5310   comp_cost cost;
5311
5312   *delta = NULL;
5313   for (i = 0; i < n_iv_uses (data); i++)
5314     {
5315       use = iv_use (data, i);
5316
5317       old_cp = iv_ca_cand_for_use (ivs, use);
5318       if (old_cp->cand != cand)
5319         continue;
5320
5321       new_cp = NULL;
5322
5323       if (data->consider_all_candidates)
5324         {
5325           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5326             {
5327               if (ci == cand->id)
5328                 continue;
5329
5330               cnd = iv_cand (data, ci);
5331
5332               cp = get_use_iv_cost (data, use, cnd);
5333               if (!cp)
5334                 continue;
5335
5336               if (!iv_ca_has_deps (ivs, cp))
5337                 continue;
5338
5339               if (!cheaper_cost_pair (cp, new_cp))
5340                 continue;
5341
5342               new_cp = cp;
5343             }
5344         }
5345       else
5346         {
5347           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5348             {
5349               if (ci == cand->id)
5350                 continue;
5351
5352               cnd = iv_cand (data, ci);
5353
5354               cp = get_use_iv_cost (data, use, cnd);
5355               if (!cp)
5356                 continue;
5357               if (!iv_ca_has_deps (ivs, cp))
5358                 continue;
5359
5360               if (!cheaper_cost_pair (cp, new_cp))
5361                 continue;
5362
5363               new_cp = cp;
5364             }
5365         }
5366
5367       if (!new_cp)
5368         {
5369           iv_ca_delta_free (delta);
5370           return infinite_cost;
5371         }
5372
5373       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5374     }
5375
5376   iv_ca_delta_commit (data, ivs, *delta, true);
5377   cost = iv_ca_cost (ivs);
5378   iv_ca_delta_commit (data, ivs, *delta, false);
5379
5380   return cost;
5381 }
5382
5383 /* Try optimizing the set of candidates IVS by removing candidates different
5384    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5385    differences in DELTA.  */
5386
5387 static comp_cost
5388 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5389              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5390 {
5391   bitmap_iterator bi;
5392   struct iv_ca_delta *act_delta, *best_delta;
5393   unsigned i;
5394   comp_cost best_cost, acost;
5395   struct iv_cand *cand;
5396
5397   best_delta = NULL;
5398   best_cost = iv_ca_cost (ivs);
5399
5400   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5401     {
5402       cand = iv_cand (data, i);
5403
5404       if (cand == except_cand)
5405         continue;
5406
5407       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5408
5409       if (compare_costs (acost, best_cost) < 0)
5410         {
5411           best_cost = acost;
5412           iv_ca_delta_free (&best_delta);
5413           best_delta = act_delta;
5414         }
5415       else
5416         iv_ca_delta_free (&act_delta);
5417     }
5418
5419   if (!best_delta)
5420     {
5421       *delta = NULL;
5422       return best_cost;
5423     }
5424
5425   /* Recurse to possibly remove other unnecessary ivs.  */
5426   iv_ca_delta_commit (data, ivs, best_delta, true);
5427   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5428   iv_ca_delta_commit (data, ivs, best_delta, false);
5429   *delta = iv_ca_delta_join (best_delta, *delta);
5430   return best_cost;
5431 }
5432
5433 /* Tries to extend the sets IVS in the best possible way in order
5434    to express the USE.  If ORIGINALP is true, prefer candidates from
5435    the original set of IVs, otherwise favor important candidates not
5436    based on any memory object.  */
5437
5438 static bool
5439 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5440                   struct iv_use *use, bool originalp)
5441 {
5442   comp_cost best_cost, act_cost;
5443   unsigned i;
5444   bitmap_iterator bi;
5445   struct iv_cand *cand;
5446   struct iv_ca_delta *best_delta = NULL, *act_delta;
5447   struct cost_pair *cp;
5448
5449   iv_ca_add_use (data, ivs, use, false);
5450   best_cost = iv_ca_cost (ivs);
5451
5452   cp = iv_ca_cand_for_use (ivs, use);
5453   if (!cp)
5454     {
5455       ivs->upto--;
5456       ivs->bad_uses--;
5457       iv_ca_add_use (data, ivs, use, true);
5458       best_cost = iv_ca_cost (ivs);
5459       cp = iv_ca_cand_for_use (ivs, use);
5460     }
5461   if (cp)
5462     {
5463       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5464       iv_ca_set_no_cp (data, ivs, use);
5465     }
5466
5467   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5468      first try important candidates not based on any memory object.  Only if
5469      this fails, try the specific ones.  Rationale -- in loops with many
5470      variables the best choice often is to use just one generic biv.  If we
5471      added here many ivs specific to the uses, the optimization algorithm later
5472      would be likely to get stuck in a local minimum, thus causing us to create
5473      too many ivs.  The approach from few ivs to more seems more likely to be
5474      successful -- starting from few ivs, replacing an expensive use by a
5475      specific iv should always be a win.  */
5476   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5477     {
5478       cand = iv_cand (data, i);
5479
5480       if (originalp && cand->pos !=IP_ORIGINAL)
5481         continue;
5482
5483       if (!originalp && cand->iv->base_object != NULL_TREE)
5484         continue;
5485
5486       if (iv_ca_cand_used_p (ivs, cand))
5487         continue;
5488
5489       cp = get_use_iv_cost (data, use, cand);
5490       if (!cp)
5491         continue;
5492
5493       iv_ca_set_cp (data, ivs, use, cp);
5494       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5495                                true);
5496       iv_ca_set_no_cp (data, ivs, use);
5497       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5498
5499       if (compare_costs (act_cost, best_cost) < 0)
5500         {
5501           best_cost = act_cost;
5502
5503           iv_ca_delta_free (&best_delta);
5504           best_delta = act_delta;
5505         }
5506       else
5507         iv_ca_delta_free (&act_delta);
5508     }
5509
5510   if (infinite_cost_p (best_cost))
5511     {
5512       for (i = 0; i < use->n_map_members; i++)
5513         {
5514           cp = use->cost_map + i;
5515           cand = cp->cand;
5516           if (!cand)
5517             continue;
5518
5519           /* Already tried this.  */
5520           if (cand->important)
5521             {
5522               if (originalp && cand->pos == IP_ORIGINAL)
5523                 continue;
5524               if (!originalp && cand->iv->base_object == NULL_TREE)
5525                 continue;
5526             }
5527
5528           if (iv_ca_cand_used_p (ivs, cand))
5529             continue;
5530
5531           act_delta = NULL;
5532           iv_ca_set_cp (data, ivs, use, cp);
5533           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5534           iv_ca_set_no_cp (data, ivs, use);
5535           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5536                                        cp, act_delta);
5537
5538           if (compare_costs (act_cost, best_cost) < 0)
5539             {
5540               best_cost = act_cost;
5541
5542               if (best_delta)
5543                 iv_ca_delta_free (&best_delta);
5544               best_delta = act_delta;
5545             }
5546           else
5547             iv_ca_delta_free (&act_delta);
5548         }
5549     }
5550
5551   iv_ca_delta_commit (data, ivs, best_delta, true);
5552   iv_ca_delta_free (&best_delta);
5553
5554   return !infinite_cost_p (best_cost);
5555 }
5556
5557 /* Finds an initial assignment of candidates to uses.  */
5558
5559 static struct iv_ca *
5560 get_initial_solution (struct ivopts_data *data, bool originalp)
5561 {
5562   struct iv_ca *ivs = iv_ca_new (data);
5563   unsigned i;
5564
5565   for (i = 0; i < n_iv_uses (data); i++)
5566     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5567       {
5568         iv_ca_free (&ivs);
5569         return NULL;
5570       }
5571
5572   return ivs;
5573 }
5574
5575 /* Tries to improve set of induction variables IVS.  */
5576
5577 static bool
5578 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5579 {
5580   unsigned i, n_ivs;
5581   comp_cost acost, best_cost = iv_ca_cost (ivs);
5582   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5583   struct iv_cand *cand;
5584
5585   /* Try extending the set of induction variables by one.  */
5586   for (i = 0; i < n_iv_cands (data); i++)
5587     {
5588       cand = iv_cand (data, i);
5589
5590       if (iv_ca_cand_used_p (ivs, cand))
5591         continue;
5592
5593       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5594       if (!act_delta)
5595         continue;
5596
5597       /* If we successfully added the candidate and the set is small enough,
5598          try optimizing it by removing other candidates.  */
5599       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5600         {
5601           iv_ca_delta_commit (data, ivs, act_delta, true);
5602           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5603           iv_ca_delta_commit (data, ivs, act_delta, false);
5604           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5605         }
5606
5607       if (compare_costs (acost, best_cost) < 0)
5608         {
5609           best_cost = acost;
5610           iv_ca_delta_free (&best_delta);
5611           best_delta = act_delta;
5612         }
5613       else
5614         iv_ca_delta_free (&act_delta);
5615     }
5616
5617   if (!best_delta)
5618     {
5619       /* Try removing the candidates from the set instead.  */
5620       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5621
5622       /* Nothing more we can do.  */
5623       if (!best_delta)
5624         return false;
5625     }
5626
5627   iv_ca_delta_commit (data, ivs, best_delta, true);
5628   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5629   iv_ca_delta_free (&best_delta);
5630   return true;
5631 }
5632
5633 /* Attempts to find the optimal set of induction variables.  We do simple
5634    greedy heuristic -- we try to replace at most one candidate in the selected
5635    solution and remove the unused ivs while this improves the cost.  */
5636
5637 static struct iv_ca *
5638 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5639 {
5640   struct iv_ca *set;
5641
5642   /* Get the initial solution.  */
5643   set = get_initial_solution (data, originalp);
5644   if (!set)
5645     {
5646       if (dump_file && (dump_flags & TDF_DETAILS))
5647         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5648       return NULL;
5649     }
5650
5651   if (dump_file && (dump_flags & TDF_DETAILS))
5652     {
5653       fprintf (dump_file, "Initial set of candidates:\n");
5654       iv_ca_dump (data, dump_file, set);
5655     }
5656
5657   while (try_improve_iv_set (data, set))
5658     {
5659       if (dump_file && (dump_flags & TDF_DETAILS))
5660         {
5661           fprintf (dump_file, "Improved to:\n");
5662           iv_ca_dump (data, dump_file, set);
5663         }
5664     }
5665
5666   return set;
5667 }
5668
5669 static struct iv_ca *
5670 find_optimal_iv_set (struct ivopts_data *data)
5671 {
5672   unsigned i;
5673   struct iv_ca *set, *origset;
5674   struct iv_use *use;
5675   comp_cost cost, origcost;
5676
5677   /* Determine the cost based on a strategy that starts with original IVs,
5678      and try again using a strategy that prefers candidates not based
5679      on any IVs.  */
5680   origset = find_optimal_iv_set_1 (data, true);
5681   set = find_optimal_iv_set_1 (data, false);
5682
5683   if (!origset && !set)
5684     return NULL;
5685
5686   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5687   cost = set ? iv_ca_cost (set) : infinite_cost;
5688
5689   if (dump_file && (dump_flags & TDF_DETAILS))
5690     {
5691       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5692                origcost.cost, origcost.complexity);
5693       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5694                cost.cost, cost.complexity);
5695     }
5696
5697   /* Choose the one with the best cost.  */
5698   if (compare_costs (origcost, cost) <= 0)
5699     {
5700       if (set)
5701         iv_ca_free (&set);
5702       set = origset;
5703     }
5704   else if (origset)
5705     iv_ca_free (&origset);
5706
5707   for (i = 0; i < n_iv_uses (data); i++)
5708     {
5709       use = iv_use (data, i);
5710       use->selected = iv_ca_cand_for_use (set, use)->cand;
5711     }
5712
5713   return set;
5714 }
5715
5716 /* Creates a new induction variable corresponding to CAND.  */
5717
5718 static void
5719 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5720 {
5721   gimple_stmt_iterator incr_pos;
5722   tree base;
5723   bool after = false;
5724
5725   if (!cand->iv)
5726     return;
5727
5728   switch (cand->pos)
5729     {
5730     case IP_NORMAL:
5731       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5732       break;
5733
5734     case IP_END:
5735       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5736       after = true;
5737       break;
5738
5739     case IP_AFTER_USE:
5740       after = true;
5741       /* fall through */
5742     case IP_BEFORE_USE:
5743       incr_pos = gsi_for_stmt (cand->incremented_at);
5744       break;
5745
5746     case IP_ORIGINAL:
5747       /* Mark that the iv is preserved.  */
5748       name_info (data, cand->var_before)->preserve_biv = true;
5749       name_info (data, cand->var_after)->preserve_biv = true;
5750
5751       /* Rewrite the increment so that it uses var_before directly.  */
5752       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5753       return;
5754     }
5755
5756   gimple_add_tmp_var (cand->var_before);
5757   add_referenced_var (cand->var_before);
5758
5759   base = unshare_expr (cand->iv->base);
5760
5761   create_iv (base, unshare_expr (cand->iv->step),
5762              cand->var_before, data->current_loop,
5763              &incr_pos, after, &cand->var_before, &cand->var_after);
5764 }
5765
5766 /* Creates new induction variables described in SET.  */
5767
5768 static void
5769 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5770 {
5771   unsigned i;
5772   struct iv_cand *cand;
5773   bitmap_iterator bi;
5774
5775   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5776     {
5777       cand = iv_cand (data, i);
5778       create_new_iv (data, cand);
5779     }
5780
5781   if (dump_file && (dump_flags & TDF_DETAILS))
5782     {
5783       fprintf (dump_file, "\nSelected IV set: \n");
5784       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5785         {
5786           cand = iv_cand (data, i);
5787           dump_cand (dump_file, cand);
5788         }
5789       fprintf (dump_file, "\n");
5790     }
5791 }
5792
5793 /* Rewrites USE (definition of iv used in a nonlinear expression)
5794    using candidate CAND.  */
5795
5796 static void
5797 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5798                             struct iv_use *use, struct iv_cand *cand)
5799 {
5800   tree comp;
5801   tree op, tgt;
5802   gimple ass;
5803   gimple_stmt_iterator bsi;
5804
5805   /* An important special case -- if we are asked to express value of
5806      the original iv by itself, just exit; there is no need to
5807      introduce a new computation (that might also need casting the
5808      variable to unsigned and back).  */
5809   if (cand->pos == IP_ORIGINAL
5810       && cand->incremented_at == use->stmt)
5811     {
5812       tree step, ctype, utype;
5813       enum tree_code incr_code = PLUS_EXPR, old_code;
5814
5815       gcc_assert (is_gimple_assign (use->stmt));
5816       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5817
5818       step = cand->iv->step;
5819       ctype = TREE_TYPE (step);
5820       utype = TREE_TYPE (cand->var_after);
5821       if (TREE_CODE (step) == NEGATE_EXPR)
5822         {
5823           incr_code = MINUS_EXPR;
5824           step = TREE_OPERAND (step, 0);
5825         }
5826
5827       /* Check whether we may leave the computation unchanged.
5828          This is the case only if it does not rely on other
5829          computations in the loop -- otherwise, the computation
5830          we rely upon may be removed in remove_unused_ivs,
5831          thus leading to ICE.  */
5832       old_code = gimple_assign_rhs_code (use->stmt);
5833       if (old_code == PLUS_EXPR
5834           || old_code == MINUS_EXPR
5835           || old_code == POINTER_PLUS_EXPR)
5836         {
5837           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5838             op = gimple_assign_rhs2 (use->stmt);
5839           else if (old_code != MINUS_EXPR
5840                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5841             op = gimple_assign_rhs1 (use->stmt);
5842           else
5843             op = NULL_TREE;
5844         }
5845       else
5846         op = NULL_TREE;
5847
5848       if (op
5849           && (TREE_CODE (op) == INTEGER_CST
5850               || operand_equal_p (op, step, 0)))
5851         return;
5852
5853       /* Otherwise, add the necessary computations to express
5854          the iv.  */
5855       op = fold_convert (ctype, cand->var_before);
5856       comp = fold_convert (utype,
5857                            build2 (incr_code, ctype, op,
5858                                    unshare_expr (step)));
5859     }
5860   else
5861     {
5862       comp = get_computation (data->current_loop, use, cand);
5863       gcc_assert (comp != NULL_TREE);
5864     }
5865
5866   switch (gimple_code (use->stmt))
5867     {
5868     case GIMPLE_PHI:
5869       tgt = PHI_RESULT (use->stmt);
5870
5871       /* If we should keep the biv, do not replace it.  */
5872       if (name_info (data, tgt)->preserve_biv)
5873         return;
5874
5875       bsi = gsi_after_labels (gimple_bb (use->stmt));
5876       break;
5877
5878     case GIMPLE_ASSIGN:
5879       tgt = gimple_assign_lhs (use->stmt);
5880       bsi = gsi_for_stmt (use->stmt);
5881       break;
5882
5883     default:
5884       gcc_unreachable ();
5885     }
5886
5887   if (!valid_gimple_rhs_p (comp)
5888       || (gimple_code (use->stmt) != GIMPLE_PHI
5889           /* We can't allow re-allocating the stmt as it might be pointed
5890              to still.  */
5891           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5892               >= gimple_num_ops (gsi_stmt (bsi)))))
5893     {
5894       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5895                                        true, GSI_SAME_STMT);
5896       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5897         {
5898           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5899           /* As this isn't a plain copy we have to reset alignment
5900              information.  */
5901           if (SSA_NAME_PTR_INFO (comp))
5902             {
5903               SSA_NAME_PTR_INFO (comp)->align = 1;
5904               SSA_NAME_PTR_INFO (comp)->misalign = 0;
5905             }
5906         }
5907     }
5908
5909   if (gimple_code (use->stmt) == GIMPLE_PHI)
5910     {
5911       ass = gimple_build_assign (tgt, comp);
5912       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5913
5914       bsi = gsi_for_stmt (use->stmt);
5915       remove_phi_node (&bsi, false);
5916     }
5917   else
5918     {
5919       gimple_assign_set_rhs_from_tree (&bsi, comp);
5920       use->stmt = gsi_stmt (bsi);
5921     }
5922 }
5923
5924 /* Copies the reference information from OLD_REF to NEW_REF.  */
5925
5926 static void
5927 copy_ref_info (tree new_ref, tree old_ref)
5928 {
5929   tree new_ptr_base = NULL_TREE;
5930
5931   TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5932   TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5933
5934   new_ptr_base = TREE_OPERAND (new_ref, 0);
5935
5936   /* We can transfer points-to information from an old pointer
5937      or decl base to the new one.  */
5938   if (new_ptr_base
5939       && TREE_CODE (new_ptr_base) == SSA_NAME
5940       && !SSA_NAME_PTR_INFO (new_ptr_base))
5941     {
5942       tree base = get_base_address (old_ref);
5943       if (!base)
5944         ;
5945       else if ((TREE_CODE (base) == MEM_REF
5946                 || TREE_CODE (base) == TARGET_MEM_REF)
5947                && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
5948                && SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)))
5949         {
5950           struct ptr_info_def *new_pi;
5951           duplicate_ssa_name_ptr_info
5952             (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5953           new_pi = SSA_NAME_PTR_INFO (new_ptr_base);
5954           /* We have to be careful about transfering alignment information.  */
5955           if (TREE_CODE (old_ref) == MEM_REF
5956               && !(TREE_CODE (new_ref) == TARGET_MEM_REF
5957                    && (TMR_INDEX2 (new_ref)
5958                        || (TMR_STEP (new_ref)
5959                            && (TREE_INT_CST_LOW (TMR_STEP (new_ref))
5960                                < new_pi->align)))))
5961             {
5962               new_pi->misalign += double_int_sub (mem_ref_offset (old_ref),
5963                                                   mem_ref_offset (new_ref)).low;
5964               new_pi->misalign &= (new_pi->align - 1);
5965             }
5966           else
5967             {
5968               new_pi->align = 1;
5969               new_pi->misalign = 0;
5970             }
5971         }
5972       else if (TREE_CODE (base) == VAR_DECL
5973                || TREE_CODE (base) == PARM_DECL
5974                || TREE_CODE (base) == RESULT_DECL)
5975         {
5976           struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5977           pt_solution_set_var (&pi->pt, base);
5978         }
5979     }
5980 }
5981
5982 /* Performs a peephole optimization to reorder the iv update statement with
5983    a mem ref to enable instruction combining in later phases. The mem ref uses
5984    the iv value before the update, so the reordering transformation requires
5985    adjustment of the offset. CAND is the selected IV_CAND.
5986
5987    Example:
5988
5989    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
5990    iv2 = iv1 + 1;
5991
5992    if (t < val)      (1)
5993      goto L;
5994    goto Head;
5995
5996
5997    directly propagating t over to (1) will introduce overlapping live range
5998    thus increase register pressure. This peephole transform it into:
5999
6000
6001    iv2 = iv1 + 1;
6002    t = MEM_REF (base, iv2, 8, 8);
6003    if (t < val)
6004      goto L;
6005    goto Head;
6006 */
6007
6008 static void
6009 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6010 {
6011   tree var_after;
6012   gimple iv_update, stmt;
6013   basic_block bb;
6014   gimple_stmt_iterator gsi, gsi_iv;
6015
6016   if (cand->pos != IP_NORMAL)
6017     return;
6018
6019   var_after = cand->var_after;
6020   iv_update = SSA_NAME_DEF_STMT (var_after);
6021
6022   bb = gimple_bb (iv_update);
6023   gsi = gsi_last_nondebug_bb (bb);
6024   stmt = gsi_stmt (gsi);
6025
6026   /* Only handle conditional statement for now.  */
6027   if (gimple_code (stmt) != GIMPLE_COND)
6028     return;
6029
6030   gsi_prev_nondebug (&gsi);
6031   stmt = gsi_stmt (gsi);
6032   if (stmt != iv_update)
6033     return;
6034
6035   gsi_prev_nondebug (&gsi);
6036   if (gsi_end_p (gsi))
6037     return;
6038
6039   stmt = gsi_stmt (gsi);
6040   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6041     return;
6042
6043   if (stmt != use->stmt)
6044     return;
6045
6046   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6047     return;
6048
6049   if (dump_file && (dump_flags & TDF_DETAILS))
6050     {
6051       fprintf (dump_file, "Reordering \n");
6052       print_gimple_stmt (dump_file, iv_update, 0, 0);
6053       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6054       fprintf (dump_file, "\n");
6055     }
6056
6057   gsi = gsi_for_stmt (use->stmt);
6058   gsi_iv = gsi_for_stmt (iv_update);
6059   gsi_move_before (&gsi_iv, &gsi);
6060
6061   cand->pos = IP_BEFORE_USE;
6062   cand->incremented_at = use->stmt;
6063 }
6064
6065 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6066
6067 static void
6068 rewrite_use_address (struct ivopts_data *data,
6069                      struct iv_use *use, struct iv_cand *cand)
6070 {
6071   aff_tree aff;
6072   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6073   tree base_hint = NULL_TREE;
6074   tree ref, iv;
6075   bool ok;
6076
6077   adjust_iv_update_pos (cand, use);
6078   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6079   gcc_assert (ok);
6080   unshare_aff_combination (&aff);
6081
6082   /* To avoid undefined overflow problems, all IV candidates use unsigned
6083      integer types.  The drawback is that this makes it impossible for
6084      create_mem_ref to distinguish an IV that is based on a memory object
6085      from one that represents simply an offset.
6086
6087      To work around this problem, we pass a hint to create_mem_ref that
6088      indicates which variable (if any) in aff is an IV based on a memory
6089      object.  Note that we only consider the candidate.  If this is not
6090      based on an object, the base of the reference is in some subexpression
6091      of the use -- but these will use pointer types, so they are recognized
6092      by the create_mem_ref heuristics anyway.  */
6093   if (cand->iv->base_object)
6094     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6095
6096   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6097   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6098                         reference_alias_ptr_type (*use->op_p),
6099                         iv, base_hint, data->speed);
6100   copy_ref_info (ref, *use->op_p);
6101   *use->op_p = ref;
6102 }
6103
6104 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6105    candidate CAND.  */
6106
6107 static void
6108 rewrite_use_compare (struct ivopts_data *data,
6109                      struct iv_use *use, struct iv_cand *cand)
6110 {
6111   tree comp, *var_p, op, bound;
6112   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6113   enum tree_code compare;
6114   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6115   bool ok;
6116
6117   bound = cp->value;
6118   if (bound)
6119     {
6120       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6121       tree var_type = TREE_TYPE (var);
6122       gimple_seq stmts;
6123
6124       if (dump_file && (dump_flags & TDF_DETAILS))
6125         {
6126           fprintf (dump_file, "Replacing exit test: ");
6127           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6128         }
6129       compare = iv_elimination_compare (data, use);
6130       bound = unshare_expr (fold_convert (var_type, bound));
6131       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6132       if (stmts)
6133         gsi_insert_seq_on_edge_immediate (
6134                 loop_preheader_edge (data->current_loop),
6135                 stmts);
6136
6137       gimple_cond_set_lhs (use->stmt, var);
6138       gimple_cond_set_code (use->stmt, compare);
6139       gimple_cond_set_rhs (use->stmt, op);
6140       return;
6141     }
6142
6143   /* The induction variable elimination failed; just express the original
6144      giv.  */
6145   comp = get_computation (data->current_loop, use, cand);
6146   gcc_assert (comp != NULL_TREE);
6147
6148   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6149   gcc_assert (ok);
6150
6151   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6152                                      true, GSI_SAME_STMT);
6153 }
6154
6155 /* Rewrites USE using candidate CAND.  */
6156
6157 static void
6158 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6159 {
6160   switch (use->type)
6161     {
6162       case USE_NONLINEAR_EXPR:
6163         rewrite_use_nonlinear_expr (data, use, cand);
6164         break;
6165
6166       case USE_ADDRESS:
6167         rewrite_use_address (data, use, cand);
6168         break;
6169
6170       case USE_COMPARE:
6171         rewrite_use_compare (data, use, cand);
6172         break;
6173
6174       default:
6175         gcc_unreachable ();
6176     }
6177
6178   update_stmt (use->stmt);
6179 }
6180
6181 /* Rewrite the uses using the selected induction variables.  */
6182
6183 static void
6184 rewrite_uses (struct ivopts_data *data)
6185 {
6186   unsigned i;
6187   struct iv_cand *cand;
6188   struct iv_use *use;
6189
6190   for (i = 0; i < n_iv_uses (data); i++)
6191     {
6192       use = iv_use (data, i);
6193       cand = use->selected;
6194       gcc_assert (cand);
6195
6196       rewrite_use (data, use, cand);
6197     }
6198 }
6199
6200 /* Removes the ivs that are not used after rewriting.  */
6201
6202 static void
6203 remove_unused_ivs (struct ivopts_data *data)
6204 {
6205   unsigned j;
6206   bitmap_iterator bi;
6207   bitmap toremove = BITMAP_ALLOC (NULL);
6208
6209   /* Figure out an order in which to release SSA DEFs so that we don't
6210      release something that we'd have to propagate into a debug stmt
6211      afterwards.  */
6212   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6213     {
6214       struct version_info *info;
6215
6216       info = ver_info (data, j);
6217       if (info->iv
6218           && !integer_zerop (info->iv->step)
6219           && !info->inv_id
6220           && !info->iv->have_use_for
6221           && !info->preserve_biv)
6222         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6223     }
6224
6225   release_defs_bitset (toremove);
6226
6227   BITMAP_FREE (toremove);
6228 }
6229
6230 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6231    for pointer_map_traverse.  */
6232
6233 static bool
6234 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6235                       void *data ATTRIBUTE_UNUSED)
6236 {
6237   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6238
6239   free (niter);
6240   return true;
6241 }
6242
6243 /* Frees data allocated by the optimization of a single loop.  */
6244
6245 static void
6246 free_loop_data (struct ivopts_data *data)
6247 {
6248   unsigned i, j;
6249   bitmap_iterator bi;
6250   tree obj;
6251
6252   if (data->niters)
6253     {
6254       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6255       pointer_map_destroy (data->niters);
6256       data->niters = NULL;
6257     }
6258
6259   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6260     {
6261       struct version_info *info;
6262
6263       info = ver_info (data, i);
6264       free (info->iv);
6265       info->iv = NULL;
6266       info->has_nonlin_use = false;
6267       info->preserve_biv = false;
6268       info->inv_id = 0;
6269     }
6270   bitmap_clear (data->relevant);
6271   bitmap_clear (data->important_candidates);
6272
6273   for (i = 0; i < n_iv_uses (data); i++)
6274     {
6275       struct iv_use *use = iv_use (data, i);
6276
6277       free (use->iv);
6278       BITMAP_FREE (use->related_cands);
6279       for (j = 0; j < use->n_map_members; j++)
6280         if (use->cost_map[j].depends_on)
6281           BITMAP_FREE (use->cost_map[j].depends_on);
6282       free (use->cost_map);
6283       free (use);
6284     }
6285   VEC_truncate (iv_use_p, data->iv_uses, 0);
6286
6287   for (i = 0; i < n_iv_cands (data); i++)
6288     {
6289       struct iv_cand *cand = iv_cand (data, i);
6290
6291       free (cand->iv);
6292       if (cand->depends_on)
6293         BITMAP_FREE (cand->depends_on);
6294       free (cand);
6295     }
6296   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6297
6298   if (data->version_info_size < num_ssa_names)
6299     {
6300       data->version_info_size = 2 * num_ssa_names;
6301       free (data->version_info);
6302       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6303     }
6304
6305   data->max_inv_id = 0;
6306
6307   FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6308     SET_DECL_RTL (obj, NULL_RTX);
6309
6310   VEC_truncate (tree, decl_rtl_to_reset, 0);
6311
6312   htab_empty (data->inv_expr_tab);
6313   data->inv_expr_id = 0;
6314 }
6315
6316 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6317    loop tree.  */
6318
6319 static void
6320 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6321 {
6322   free_loop_data (data);
6323   free (data->version_info);
6324   BITMAP_FREE (data->relevant);
6325   BITMAP_FREE (data->important_candidates);
6326
6327   VEC_free (tree, heap, decl_rtl_to_reset);
6328   VEC_free (iv_use_p, heap, data->iv_uses);
6329   VEC_free (iv_cand_p, heap, data->iv_candidates);
6330   htab_delete (data->inv_expr_tab);
6331 }
6332
6333 /* Returns true if the loop body BODY includes any function calls.  */
6334
6335 static bool
6336 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6337 {
6338   gimple_stmt_iterator gsi;
6339   unsigned i;
6340
6341   for (i = 0; i < num_nodes; i++)
6342     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6343       {
6344         gimple stmt = gsi_stmt (gsi);
6345         if (is_gimple_call (stmt)
6346             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6347           return true;
6348       }
6349   return false;
6350 }
6351
6352 /* Optimizes the LOOP.  Returns true if anything changed.  */
6353
6354 static bool
6355 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6356 {
6357   bool changed = false;
6358   struct iv_ca *iv_ca;
6359   edge exit;
6360   basic_block *body;
6361
6362   gcc_assert (!data->niters);
6363   data->current_loop = loop;
6364   data->speed = optimize_loop_for_speed_p (loop);
6365
6366   if (dump_file && (dump_flags & TDF_DETAILS))
6367     {
6368       fprintf (dump_file, "Processing loop %d\n", loop->num);
6369
6370       exit = single_dom_exit (loop);
6371       if (exit)
6372         {
6373           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6374                    exit->src->index, exit->dest->index);
6375           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6376           fprintf (dump_file, "\n");
6377         }
6378
6379       fprintf (dump_file, "\n");
6380     }
6381
6382   body = get_loop_body (loop);
6383   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6384   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6385   free (body);
6386
6387   /* For each ssa name determines whether it behaves as an induction variable
6388      in some loop.  */
6389   if (!find_induction_variables (data))
6390     goto finish;
6391
6392   /* Finds interesting uses (item 1).  */
6393   find_interesting_uses (data);
6394   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6395     goto finish;
6396
6397   /* Finds candidates for the induction variables (item 2).  */
6398   find_iv_candidates (data);
6399
6400   /* Calculates the costs (item 3, part 1).  */
6401   determine_iv_costs (data);
6402   determine_use_iv_costs (data);
6403   determine_set_costs (data);
6404
6405   /* Find the optimal set of induction variables (item 3, part 2).  */
6406   iv_ca = find_optimal_iv_set (data);
6407   if (!iv_ca)
6408     goto finish;
6409   changed = true;
6410
6411   /* Create the new induction variables (item 4, part 1).  */
6412   create_new_ivs (data, iv_ca);
6413   iv_ca_free (&iv_ca);
6414
6415   /* Rewrite the uses (item 4, part 2).  */
6416   rewrite_uses (data);
6417
6418   /* Remove the ivs that are unused after rewriting.  */
6419   remove_unused_ivs (data);
6420
6421   /* We have changed the structure of induction variables; it might happen
6422      that definitions in the scev database refer to some of them that were
6423      eliminated.  */
6424   scev_reset ();
6425
6426 finish:
6427   free_loop_data (data);
6428
6429   return changed;
6430 }
6431
6432 /* Main entry point.  Optimizes induction variables in loops.  */
6433
6434 void
6435 tree_ssa_iv_optimize (void)
6436 {
6437   struct loop *loop;
6438   struct ivopts_data data;
6439   loop_iterator li;
6440
6441   tree_ssa_iv_optimize_init (&data);
6442
6443   /* Optimize the loops starting with the innermost ones.  */
6444   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6445     {
6446       if (dump_file && (dump_flags & TDF_DETAILS))
6447         flow_loop_dump (loop, dump_file, NULL, 1);
6448
6449       tree_ssa_iv_optimize_loop (&data, loop);
6450     }
6451
6452   tree_ssa_iv_optimize_finalize (&data);
6453 }