gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "output.h"
  73 #include "tree-pretty-print.h"
  74 #include "gimple-pretty-print.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "tree-pass.h"
  80 #include "ggc.h"
  81 #include "insn-config.h"
  82 #include "recog.h"
  83 #include "pointer-set.h"
  84 #include "hashtab.h"
  85 #include "tree-chrec.h"
  86 #include "tree-scalar-evolution.h"
  87 #include "cfgloop.h"
  88 #include "params.h"
  89 #include "langhooks.h"
  90 #include "tree-affine.h"
  91 #include "target.h"
  92 #include "tree-inline.h"
  93 #include "tree-ssa-propagate.h"
  94
  95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  96    cost of different addressing modes.  This should be moved to a TBD
  97    interface between the GIMPLE and RTL worlds.  */
  98 #include "expr.h"
  99
 100 /* The infinite cost.  */
 101 #define INFTY 10000000
 102
 103 #define AVG_LOOP_NITER(LOOP) 5
 104
 105 /* Returns the expected number of loop iterations for LOOP.
 106    The average trip count is computed from profile data if it
 107    exists. */
 108
 109 static inline HOST_WIDE_INT
 110 avg_loop_niter (struct loop *loop)
 111 {
 112   HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
 113   if (niter == -1)
 114     return AVG_LOOP_NITER (loop);
 115
 116   return niter;
 117 }
 118
 119 /* Representation of the induction variable.  */
 120 struct iv
 121 {
 122   tree base;            /* Initial value of the iv.  */
 123   tree base_object;     /* A memory object to that the induction variable points.  */
 124   tree step;            /* Step of the iv (constant only).  */
 125   tree ssa_name;        /* The ssa name with the value.  */
 126   bool biv_p;           /* Is it a biv?  */
 127   bool have_use_for;    /* Do we already have a use for it?  */
 128   unsigned use_id;      /* The identifier in the use if it is the case.  */
 129 };
 130
 131 /* Per-ssa version information (induction variable descriptions, etc.).  */
 132 struct version_info
 133 {
 134   tree name;            /* The ssa name.  */
 135   struct iv *iv;        /* Induction variable description.  */
 136   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 137                            an expression that is not an induction variable.  */
 138   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 139   unsigned inv_id;      /* Id of an invariant.  */
 140 };
 141
 142 /* Types of uses.  */
 143 enum use_type
 144 {
 145   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 146   USE_ADDRESS,          /* Use in an address.  */
 147   USE_COMPARE           /* Use is a compare.  */
 148 };
 149
 150 /* Cost of a computation.  */
 151 typedef struct
 152 {
 153   int cost;             /* The runtime cost.  */
 154   unsigned complexity;  /* The estimate of the complexity of the code for
 155                            the computation (in no concrete units --
 156                            complexity field should be larger for more
 157                            complex expressions and addressing modes).  */
 158 } comp_cost;
 159
 160 static const comp_cost zero_cost = {0, 0};
 161 static const comp_cost infinite_cost = {INFTY, INFTY};
 162
 163 /* The candidate - cost pair.  */
 164 struct cost_pair
 165 {
 166   struct iv_cand *cand; /* The candidate.  */
 167   comp_cost cost;       /* The cost.  */
 168   bitmap depends_on;    /* The list of invariants that have to be
 169                            preserved.  */
 170   tree value;           /* For final value elimination, the expression for
 171                            the final value of the iv.  For iv elimination,
 172                            the new bound to compare with.  */
 173   int inv_expr_id;      /* Loop invariant expression id.  */
 174 };
 175
 176 /* Use.  */
 177 struct iv_use
 178 {
 179   unsigned id;          /* The id of the use.  */
 180   enum use_type type;   /* Type of the use.  */
 181   struct iv *iv;        /* The induction variable it is based on.  */
 182   gimple stmt;          /* Statement in that it occurs.  */
 183   tree *op_p;           /* The place where it occurs.  */
 184   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 185                            important ones.  */
 186
 187   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 188   struct cost_pair *cost_map;
 189                         /* The costs wrto the iv candidates.  */
 190
 191   struct iv_cand *selected;
 192                         /* The selected candidate.  */
 193 };
 194
 195 /* The position where the iv is computed.  */
 196 enum iv_position
 197 {
 198   IP_NORMAL,            /* At the end, just before the exit condition.  */
 199   IP_END,               /* At the end of the latch block.  */
 200   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 201   IP_AFTER_USE,         /* Immediately after a specific use.  */
 202   IP_ORIGINAL           /* The original biv.  */
 203 };
 204
 205 /* The induction variable candidate.  */
 206 struct iv_cand
 207 {
 208   unsigned id;          /* The number of the candidate.  */
 209   bool important;       /* Whether this is an "important" candidate, i.e. such
 210                            that it should be considered by all uses.  */
 211   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 212   gimple incremented_at;/* For original biv, the statement where it is
 213                            incremented.  */
 214   tree var_before;      /* The variable used for it before increment.  */
 215   tree var_after;       /* The variable used for it after increment.  */
 216   struct iv *iv;        /* The value of the candidate.  NULL for
 217                            "pseudocandidate" used to indicate the possibility
 218                            to replace the final value of an iv by direct
 219                            computation of the value.  */
 220   unsigned cost;        /* Cost of the candidate.  */
 221   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 222   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 223                               where it is incremented.  */
 224   bitmap depends_on;    /* The list of invariants that are used in step of the
 225                            biv.  */
 226 };
 227
 228 /* Loop invariant expression hashtable entry.  */
 229 struct iv_inv_expr_ent
 230 {
 231   tree expr;
 232   int id;
 233   hashval_t hash;
 234 };
 235
 236 /* The data used by the induction variable optimizations.  */
 237
 238 typedef struct iv_use *iv_use_p;
 239 DEF_VEC_P(iv_use_p);
 240 DEF_VEC_ALLOC_P(iv_use_p,heap);
 241
 242 typedef struct iv_cand *iv_cand_p;
 243 DEF_VEC_P(iv_cand_p);
 244 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 245
 246 struct ivopts_data
 247 {
 248   /* The currently optimized loop.  */
 249   struct loop *current_loop;
 250
 251   /* Numbers of iterations for all exits of the current loop.  */
 252   struct pointer_map_t *niters;
 253
 254   /* Number of registers used in it.  */
 255   unsigned regs_used;
 256
 257   /* The size of version_info array allocated.  */
 258   unsigned version_info_size;
 259
 260   /* The array of information for the ssa names.  */
 261   struct version_info *version_info;
 262
 263   /* The hashtable of loop invariant expressions created
 264      by ivopt.  */
 265   htab_t inv_expr_tab;
 266
 267   /* Loop invariant expression id.  */
 268   int inv_expr_id;
 269
 270   /* The bitmap of indices in version_info whose value was changed.  */
 271   bitmap relevant;
 272
 273   /* The uses of induction variables.  */
 274   VEC(iv_use_p,heap) *iv_uses;
 275
 276   /* The candidates.  */
 277   VEC(iv_cand_p,heap) *iv_candidates;
 278
 279   /* A bitmap of important candidates.  */
 280   bitmap important_candidates;
 281
 282   /* The maximum invariant id.  */
 283   unsigned max_inv_id;
 284
 285   /* Whether to consider just related and important candidates when replacing a
 286      use.  */
 287   bool consider_all_candidates;
 288
 289   /* Are we optimizing for speed?  */
 290   bool speed;
 291
 292   /* Whether the loop body includes any function calls.  */
 293   bool body_includes_call;
 294 };
 295
 296 /* An assignment of iv candidates to uses.  */
 297
 298 struct iv_ca
 299 {
 300   /* The number of uses covered by the assignment.  */
 301   unsigned upto;
 302
 303   /* Number of uses that cannot be expressed by the candidates in the set.  */
 304   unsigned bad_uses;
 305
 306   /* Candidate assigned to a use, together with the related costs.  */
 307   struct cost_pair **cand_for_use;
 308
 309   /* Number of times each candidate is used.  */
 310   unsigned *n_cand_uses;
 311
 312   /* The candidates used.  */
 313   bitmap cands;
 314
 315   /* The number of candidates in the set.  */
 316   unsigned n_cands;
 317
 318   /* Total number of registers needed.  */
 319   unsigned n_regs;
 320
 321   /* Total cost of expressing uses.  */
 322   comp_cost cand_use_cost;
 323
 324   /* Total cost of candidates.  */
 325   unsigned cand_cost;
 326
 327   /* Number of times each invariant is used.  */
 328   unsigned *n_invariant_uses;
 329
 330   /* The array holding the number of uses of each loop
 331      invariant expressions created by ivopt.  */
 332   unsigned *used_inv_expr;
 333
 334   /* The number of created loop invariants.  */
 335   unsigned num_used_inv_expr;
 336
 337   /* Total cost of the assignment.  */
 338   comp_cost cost;
 339 };
 340
 341 /* Difference of two iv candidate assignments.  */
 342
 343 struct iv_ca_delta
 344 {
 345   /* Changed use.  */
 346   struct iv_use *use;
 347
 348   /* An old assignment (for rollback purposes).  */
 349   struct cost_pair *old_cp;
 350
 351   /* A new assignment.  */
 352   struct cost_pair *new_cp;
 353
 354   /* Next change in the list.  */
 355   struct iv_ca_delta *next_change;
 356 };
 357
 358 /* Bound on number of candidates below that all candidates are considered.  */
 359
 360 #define CONSIDER_ALL_CANDIDATES_BOUND \
 361   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 362
 363 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 364    optimizing such a loop would help, and it would take ages).  */
 365
 366 #define MAX_CONSIDERED_USES \
 367   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 368
 369 /* If there are at most this number of ivs in the set, try removing unnecessary
 370    ivs from the set always.  */
 371
 372 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 373   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 374
 375 /* The list of trees for that the decl_rtl field must be reset is stored
 376    here.  */
 377
 378 static VEC(tree,heap) *decl_rtl_to_reset;
 379
 380 /* Number of uses recorded in DATA.  */
 381
 382 static inline unsigned
 383 n_iv_uses (struct ivopts_data *data)
 384 {
 385   return VEC_length (iv_use_p, data->iv_uses);
 386 }
 387
 388 /* Ith use recorded in DATA.  */
 389
 390 static inline struct iv_use *
 391 iv_use (struct ivopts_data *data, unsigned i)
 392 {
 393   return VEC_index (iv_use_p, data->iv_uses, i);
 394 }
 395
 396 /* Number of candidates recorded in DATA.  */
 397
 398 static inline unsigned
 399 n_iv_cands (struct ivopts_data *data)
 400 {
 401   return VEC_length (iv_cand_p, data->iv_candidates);
 402 }
 403
 404 /* Ith candidate recorded in DATA.  */
 405
 406 static inline struct iv_cand *
 407 iv_cand (struct ivopts_data *data, unsigned i)
 408 {
 409   return VEC_index (iv_cand_p, data->iv_candidates, i);
 410 }
 411
 412 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 413
 414 edge
 415 single_dom_exit (struct loop *loop)
 416 {
 417   edge exit = single_exit (loop);
 418
 419   if (!exit)
 420     return NULL;
 421
 422   if (!just_once_each_iteration_p (loop, exit->src))
 423     return NULL;
 424
 425   return exit;
 426 }
 427
 428 /* Dumps information about the induction variable IV to FILE.  */
 429
 430 extern void dump_iv (FILE *, struct iv *);
 431 void
 432 dump_iv (FILE *file, struct iv *iv)
 433 {
 434   if (iv->ssa_name)
 435     {
 436       fprintf (file, "ssa name ");
 437       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 438       fprintf (file, "\n");
 439     }
 440
 441   fprintf (file, "  type ");
 442   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 443   fprintf (file, "\n");
 444
 445   if (iv->step)
 446     {
 447       fprintf (file, "  base ");
 448       print_generic_expr (file, iv->base, TDF_SLIM);
 449       fprintf (file, "\n");
 450
 451       fprintf (file, "  step ");
 452       print_generic_expr (file, iv->step, TDF_SLIM);
 453       fprintf (file, "\n");
 454     }
 455   else
 456     {
 457       fprintf (file, "  invariant ");
 458       print_generic_expr (file, iv->base, TDF_SLIM);
 459       fprintf (file, "\n");
 460     }
 461
 462   if (iv->base_object)
 463     {
 464       fprintf (file, "  base object ");
 465       print_generic_expr (file, iv->base_object, TDF_SLIM);
 466       fprintf (file, "\n");
 467     }
 468
 469   if (iv->biv_p)
 470     fprintf (file, "  is a biv\n");
 471 }
 472
 473 /* Dumps information about the USE to FILE.  */
 474
 475 extern void dump_use (FILE *, struct iv_use *);
 476 void
 477 dump_use (FILE *file, struct iv_use *use)
 478 {
 479   fprintf (file, "use %d\n", use->id);
 480
 481   switch (use->type)
 482     {
 483     case USE_NONLINEAR_EXPR:
 484       fprintf (file, "  generic\n");
 485       break;
 486
 487     case USE_ADDRESS:
 488       fprintf (file, "  address\n");
 489       break;
 490
 491     case USE_COMPARE:
 492       fprintf (file, "  compare\n");
 493       break;
 494
 495     default:
 496       gcc_unreachable ();
 497     }
 498
 499   fprintf (file, "  in statement ");
 500   print_gimple_stmt (file, use->stmt, 0, 0);
 501   fprintf (file, "\n");
 502
 503   fprintf (file, "  at position ");
 504   if (use->op_p)
 505     print_generic_expr (file, *use->op_p, TDF_SLIM);
 506   fprintf (file, "\n");
 507
 508   dump_iv (file, use->iv);
 509
 510   if (use->related_cands)
 511     {
 512       fprintf (file, "  related candidates ");
 513       dump_bitmap (file, use->related_cands);
 514     }
 515 }
 516
 517 /* Dumps information about the uses to FILE.  */
 518
 519 extern void dump_uses (FILE *, struct ivopts_data *);
 520 void
 521 dump_uses (FILE *file, struct ivopts_data *data)
 522 {
 523   unsigned i;
 524   struct iv_use *use;
 525
 526   for (i = 0; i < n_iv_uses (data); i++)
 527     {
 528       use = iv_use (data, i);
 529
 530       dump_use (file, use);
 531       fprintf (file, "\n");
 532     }
 533 }
 534
 535 /* Dumps information about induction variable candidate CAND to FILE.  */
 536
 537 extern void dump_cand (FILE *, struct iv_cand *);
 538 void
 539 dump_cand (FILE *file, struct iv_cand *cand)
 540 {
 541   struct iv *iv = cand->iv;
 542
 543   fprintf (file, "candidate %d%s\n",
 544            cand->id, cand->important ? " (important)" : "");
 545
 546   if (cand->depends_on)
 547     {
 548       fprintf (file, "  depends on ");
 549       dump_bitmap (file, cand->depends_on);
 550     }
 551
 552   if (!iv)
 553     {
 554       fprintf (file, "  final value replacement\n");
 555       return;
 556     }
 557
 558   if (cand->var_before)
 559     {
 560       fprintf (file, "  var_before ");
 561       print_generic_expr (file, cand->var_before, TDF_SLIM);
 562       fprintf (file, "\n");
 563     }
 564   if (cand->var_after)
 565     {
 566       fprintf (file, "  var_after ");
 567       print_generic_expr (file, cand->var_after, TDF_SLIM);
 568       fprintf (file, "\n");
 569     }
 570
 571   switch (cand->pos)
 572     {
 573     case IP_NORMAL:
 574       fprintf (file, "  incremented before exit test\n");
 575       break;
 576
 577     case IP_BEFORE_USE:
 578       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 579       break;
 580
 581     case IP_AFTER_USE:
 582       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 583       break;
 584
 585     case IP_END:
 586       fprintf (file, "  incremented at end\n");
 587       break;
 588
 589     case IP_ORIGINAL:
 590       fprintf (file, "  original biv\n");
 591       break;
 592     }
 593
 594   dump_iv (file, iv);
 595 }
 596
 597 /* Returns the info for ssa version VER.  */
 598
 599 static inline struct version_info *
 600 ver_info (struct ivopts_data *data, unsigned ver)
 601 {
 602   return data->version_info + ver;
 603 }
 604
 605 /* Returns the info for ssa name NAME.  */
 606
 607 static inline struct version_info *
 608 name_info (struct ivopts_data *data, tree name)
 609 {
 610   return ver_info (data, SSA_NAME_VERSION (name));
 611 }
 612
 613 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 614    emitted in LOOP.  */
 615
 616 static bool
 617 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 618 {
 619   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 620
 621   gcc_assert (bb);
 622
 623   if (sbb == loop->latch)
 624     return true;
 625
 626   if (sbb != bb)
 627     return false;
 628
 629   return stmt == last_stmt (bb);
 630 }
 631
 632 /* Returns true if STMT if after the place where the original induction
 633    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 634    if the positions are identical.  */
 635
 636 static bool
 637 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 638 {
 639   basic_block cand_bb = gimple_bb (cand->incremented_at);
 640   basic_block stmt_bb = gimple_bb (stmt);
 641
 642   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 643     return false;
 644
 645   if (stmt_bb != cand_bb)
 646     return true;
 647
 648   if (true_if_equal
 649       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 650     return true;
 651   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 652 }
 653
 654 /* Returns true if STMT if after the place where the induction variable
 655    CAND is incremented in LOOP.  */
 656
 657 static bool
 658 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 659 {
 660   switch (cand->pos)
 661     {
 662     case IP_END:
 663       return false;
 664
 665     case IP_NORMAL:
 666       return stmt_after_ip_normal_pos (loop, stmt);
 667
 668     case IP_ORIGINAL:
 669     case IP_AFTER_USE:
 670       return stmt_after_inc_pos (cand, stmt, false);
 671
 672     case IP_BEFORE_USE:
 673       return stmt_after_inc_pos (cand, stmt, true);
 674
 675     default:
 676       gcc_unreachable ();
 677     }
 678 }
 679
 680 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 681
 682 static bool
 683 abnormal_ssa_name_p (tree exp)
 684 {
 685   if (!exp)
 686     return false;
 687
 688   if (TREE_CODE (exp) != SSA_NAME)
 689     return false;
 690
 691   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 692 }
 693
 694 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 695    abnormal phi node.  Callback for for_each_index.  */
 696
 697 static bool
 698 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 699                                   void *data ATTRIBUTE_UNUSED)
 700 {
 701   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 702     {
 703       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 704         return false;
 705       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 706         return false;
 707     }
 708
 709   return !abnormal_ssa_name_p (*index);
 710 }
 711
 712 /* Returns true if EXPR contains a ssa name that occurs in an
 713    abnormal phi node.  */
 714
 715 bool
 716 contains_abnormal_ssa_name_p (tree expr)
 717 {
 718   enum tree_code code;
 719   enum tree_code_class codeclass;
 720
 721   if (!expr)
 722     return false;
 723
 724   code = TREE_CODE (expr);
 725   codeclass = TREE_CODE_CLASS (code);
 726
 727   if (code == SSA_NAME)
 728     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 729
 730   if (code == INTEGER_CST
 731       || is_gimple_min_invariant (expr))
 732     return false;
 733
 734   if (code == ADDR_EXPR)
 735     return !for_each_index (&TREE_OPERAND (expr, 0),
 736                             idx_contains_abnormal_ssa_name_p,
 737                             NULL);
 738
 739   if (code == COND_EXPR)
 740     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 741       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 742       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 743
 744   switch (codeclass)
 745     {
 746     case tcc_binary:
 747     case tcc_comparison:
 748       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 749         return true;
 750
 751       /* Fallthru.  */
 752     case tcc_unary:
 753       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 754         return true;
 755
 756       break;
 757
 758     default:
 759       gcc_unreachable ();
 760     }
 761
 762   return false;
 763 }
 764
 765 /*  Returns tree describing number of iterations determined from
 766     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 767
 768 static tree
 769 niter_for_exit (struct ivopts_data *data, edge exit,
 770                 struct tree_niter_desc **desc_p)
 771 {
 772   struct tree_niter_desc* desc = NULL;
 773   tree niter;
 774   void **slot;
 775
 776   if (!data->niters)
 777     {
 778       data->niters = pointer_map_create ();
 779       slot = NULL;
 780     }
 781   else
 782     slot = pointer_map_contains (data->niters, exit);
 783
 784   if (!slot)
 785     {
 786       /* Try to determine number of iterations.  We must know it
 787          unconditionally (i.e., without possibility of # of iterations
 788          being zero).  Also, we cannot safely work with ssa names that
 789          appear in phi nodes on abnormal edges, so that we do not create
 790          overlapping life ranges for them (PR 27283).  */
 791       desc = XNEW (struct tree_niter_desc);
 792       if (number_of_iterations_exit (data->current_loop,
 793                                      exit, desc, true)
 794           && integer_zerop (desc->may_be_zero)
 795           && !contains_abnormal_ssa_name_p (desc->niter))
 796         niter = desc->niter;
 797       else
 798         niter = NULL_TREE;
 799
 800       desc->niter = niter;
 801       slot = pointer_map_insert (data->niters, exit);
 802       *slot = desc;
 803     }
 804   else
 805     niter = ((struct tree_niter_desc *) *slot)->niter;
 806
 807   if (desc_p)
 808     *desc_p = (struct tree_niter_desc *) *slot;
 809   return niter;
 810 }
 811
 812 /* Returns tree describing number of iterations determined from
 813    single dominating exit of DATA->current_loop, or NULL if something
 814    goes wrong.  */
 815
 816 static tree
 817 niter_for_single_dom_exit (struct ivopts_data *data)
 818 {
 819   edge exit = single_dom_exit (data->current_loop);
 820
 821   if (!exit)
 822     return NULL;
 823
 824   return niter_for_exit (data, exit, NULL);
 825 }
 826
 827 /* Hash table equality function for expressions.  */
 828
 829 static int
 830 htab_inv_expr_eq (const void *ent1, const void *ent2)
 831 {
 832   const struct iv_inv_expr_ent *expr1 =
 833       (const struct iv_inv_expr_ent *)ent1;
 834   const struct iv_inv_expr_ent *expr2 =
 835       (const struct iv_inv_expr_ent *)ent2;
 836
 837   return expr1->hash == expr2->hash
 838          && operand_equal_p (expr1->expr, expr2->expr, 0);
 839 }
 840
 841 /* Hash function for loop invariant expressions.  */
 842
 843 static hashval_t
 844 htab_inv_expr_hash (const void *ent)
 845 {
 846   const struct iv_inv_expr_ent *expr =
 847       (const struct iv_inv_expr_ent *)ent;
 848   return expr->hash;
 849 }
 850
 851 /* Initializes data structures used by the iv optimization pass, stored
 852    in DATA.  */
 853
 854 static void
 855 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 856 {
 857   data->version_info_size = 2 * num_ssa_names;
 858   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 859   data->relevant = BITMAP_ALLOC (NULL);
 860   data->important_candidates = BITMAP_ALLOC (NULL);
 861   data->max_inv_id = 0;
 862   data->niters = NULL;
 863   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 864   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 865   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 866                                     htab_inv_expr_eq, free);
 867   data->inv_expr_id = 0;
 868   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 869 }
 870
 871 /* Returns a memory object to that EXPR points.  In case we are able to
 872    determine that it does not point to any such object, NULL is returned.  */
 873
 874 static tree
 875 determine_base_object (tree expr)
 876 {
 877   enum tree_code code = TREE_CODE (expr);
 878   tree base, obj;
 879
 880   /* If this is a pointer casted to any type, we need to determine
 881      the base object for the pointer; so handle conversions before
 882      throwing away non-pointer expressions.  */
 883   if (CONVERT_EXPR_P (expr))
 884     return determine_base_object (TREE_OPERAND (expr, 0));
 885
 886   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 887     return NULL_TREE;
 888
 889   switch (code)
 890     {
 891     case INTEGER_CST:
 892       return NULL_TREE;
 893
 894     case ADDR_EXPR:
 895       obj = TREE_OPERAND (expr, 0);
 896       base = get_base_address (obj);
 897
 898       if (!base)
 899         return expr;
 900
 901       if (TREE_CODE (base) == MEM_REF)
 902         return determine_base_object (TREE_OPERAND (base, 0));
 903
 904       return fold_convert (ptr_type_node,
 905                            build_fold_addr_expr (base));
 906
 907     case POINTER_PLUS_EXPR:
 908       return determine_base_object (TREE_OPERAND (expr, 0));
 909
 910     case PLUS_EXPR:
 911     case MINUS_EXPR:
 912       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 913       gcc_unreachable ();
 914
 915     default:
 916       return fold_convert (ptr_type_node, expr);
 917     }
 918 }
 919
 920 /* Allocates an induction variable with given initial value BASE and step STEP
 921    for loop LOOP.  */
 922
 923 static struct iv *
 924 alloc_iv (tree base, tree step)
 925 {
 926   struct iv *iv = XCNEW (struct iv);
 927   gcc_assert (step != NULL_TREE);
 928
 929   iv->base = base;
 930   iv->base_object = determine_base_object (base);
 931   iv->step = step;
 932   iv->biv_p = false;
 933   iv->have_use_for = false;
 934   iv->use_id = 0;
 935   iv->ssa_name = NULL_TREE;
 936
 937   return iv;
 938 }
 939
 940 /* Sets STEP and BASE for induction variable IV.  */
 941
 942 static void
 943 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 944 {
 945   struct version_info *info = name_info (data, iv);
 946
 947   gcc_assert (!info->iv);
 948
 949   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 950   info->iv = alloc_iv (base, step);
 951   info->iv->ssa_name = iv;
 952 }
 953
 954 /* Finds induction variable declaration for VAR.  */
 955
 956 static struct iv *
 957 get_iv (struct ivopts_data *data, tree var)
 958 {
 959   basic_block bb;
 960   tree type = TREE_TYPE (var);
 961
 962   if (!POINTER_TYPE_P (type)
 963       && !INTEGRAL_TYPE_P (type))
 964     return NULL;
 965
 966   if (!name_info (data, var)->iv)
 967     {
 968       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 969
 970       if (!bb
 971           || !flow_bb_inside_loop_p (data->current_loop, bb))
 972         set_iv (data, var, var, build_int_cst (type, 0));
 973     }
 974
 975   return name_info (data, var)->iv;
 976 }
 977
 978 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 979    not define a simple affine biv with nonzero step.  */
 980
 981 static tree
 982 determine_biv_step (gimple phi)
 983 {
 984   struct loop *loop = gimple_bb (phi)->loop_father;
 985   tree name = PHI_RESULT (phi);
 986   affine_iv iv;
 987
 988   if (!is_gimple_reg (name))
 989     return NULL_TREE;
 990
 991   if (!simple_iv (loop, loop, name, &iv, true))
 992     return NULL_TREE;
 993
 994   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 995 }
 996
 997 /* Finds basic ivs.  */
 998
 999 static bool
1000 find_bivs (struct ivopts_data *data)
1001 {
1002   gimple phi;
1003   tree step, type, base;
1004   bool found = false;
1005   struct loop *loop = data->current_loop;
1006   gimple_stmt_iterator psi;
1007
1008   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1009     {
1010       phi = gsi_stmt (psi);
1011
1012       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1013         continue;
1014
1015       step = determine_biv_step (phi);
1016       if (!step)
1017         continue;
1018
1019       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1020       base = expand_simple_operations (base);
1021       if (contains_abnormal_ssa_name_p (base)
1022           || contains_abnormal_ssa_name_p (step))
1023         continue;
1024
1025       type = TREE_TYPE (PHI_RESULT (phi));
1026       base = fold_convert (type, base);
1027       if (step)
1028         {
1029           if (POINTER_TYPE_P (type))
1030             step = fold_convert (sizetype, step);
1031           else
1032             step = fold_convert (type, step);
1033         }
1034
1035       set_iv (data, PHI_RESULT (phi), base, step);
1036       found = true;
1037     }
1038
1039   return found;
1040 }
1041
1042 /* Marks basic ivs.  */
1043
1044 static void
1045 mark_bivs (struct ivopts_data *data)
1046 {
1047   gimple phi;
1048   tree var;
1049   struct iv *iv, *incr_iv;
1050   struct loop *loop = data->current_loop;
1051   basic_block incr_bb;
1052   gimple_stmt_iterator psi;
1053
1054   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1055     {
1056       phi = gsi_stmt (psi);
1057
1058       iv = get_iv (data, PHI_RESULT (phi));
1059       if (!iv)
1060         continue;
1061
1062       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1063       incr_iv = get_iv (data, var);
1064       if (!incr_iv)
1065         continue;
1066
1067       /* If the increment is in the subloop, ignore it.  */
1068       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1069       if (incr_bb->loop_father != data->current_loop
1070           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1071         continue;
1072
1073       iv->biv_p = true;
1074       incr_iv->biv_p = true;
1075     }
1076 }
1077
1078 /* Checks whether STMT defines a linear induction variable and stores its
1079    parameters to IV.  */
1080
1081 static bool
1082 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1083 {
1084   tree lhs;
1085   struct loop *loop = data->current_loop;
1086
1087   iv->base = NULL_TREE;
1088   iv->step = NULL_TREE;
1089
1090   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1091     return false;
1092
1093   lhs = gimple_assign_lhs (stmt);
1094   if (TREE_CODE (lhs) != SSA_NAME)
1095     return false;
1096
1097   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1098     return false;
1099   iv->base = expand_simple_operations (iv->base);
1100
1101   if (contains_abnormal_ssa_name_p (iv->base)
1102       || contains_abnormal_ssa_name_p (iv->step))
1103     return false;
1104
1105   /* If STMT could throw, then do not consider STMT as defining a GIV.
1106      While this will suppress optimizations, we can not safely delete this
1107      GIV and associated statements, even if it appears it is not used.  */
1108   if (stmt_could_throw_p (stmt))
1109     return false;
1110
1111   return true;
1112 }
1113
1114 /* Finds general ivs in statement STMT.  */
1115
1116 static void
1117 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1118 {
1119   affine_iv iv;
1120
1121   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1122     return;
1123
1124   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1125 }
1126
1127 /* Finds general ivs in basic block BB.  */
1128
1129 static void
1130 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1131 {
1132   gimple_stmt_iterator bsi;
1133
1134   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1135     find_givs_in_stmt (data, gsi_stmt (bsi));
1136 }
1137
1138 /* Finds general ivs.  */
1139
1140 static void
1141 find_givs (struct ivopts_data *data)
1142 {
1143   struct loop *loop = data->current_loop;
1144   basic_block *body = get_loop_body_in_dom_order (loop);
1145   unsigned i;
1146
1147   for (i = 0; i < loop->num_nodes; i++)
1148     find_givs_in_bb (data, body[i]);
1149   free (body);
1150 }
1151
1152 /* For each ssa name defined in LOOP determines whether it is an induction
1153    variable and if so, its initial value and step.  */
1154
1155 static bool
1156 find_induction_variables (struct ivopts_data *data)
1157 {
1158   unsigned i;
1159   bitmap_iterator bi;
1160
1161   if (!find_bivs (data))
1162     return false;
1163
1164   find_givs (data);
1165   mark_bivs (data);
1166
1167   if (dump_file && (dump_flags & TDF_DETAILS))
1168     {
1169       tree niter = niter_for_single_dom_exit (data);
1170
1171       if (niter)
1172         {
1173           fprintf (dump_file, "  number of iterations ");
1174           print_generic_expr (dump_file, niter, TDF_SLIM);
1175           fprintf (dump_file, "\n\n");
1176         };
1177
1178       fprintf (dump_file, "Induction variables:\n\n");
1179
1180       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1181         {
1182           if (ver_info (data, i)->iv)
1183             dump_iv (dump_file, ver_info (data, i)->iv);
1184         }
1185     }
1186
1187   return true;
1188 }
1189
1190 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1191
1192 static struct iv_use *
1193 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1194             gimple stmt, enum use_type use_type)
1195 {
1196   struct iv_use *use = XCNEW (struct iv_use);
1197
1198   use->id = n_iv_uses (data);
1199   use->type = use_type;
1200   use->iv = iv;
1201   use->stmt = stmt;
1202   use->op_p = use_p;
1203   use->related_cands = BITMAP_ALLOC (NULL);
1204
1205   /* To avoid showing ssa name in the dumps, if it was not reset by the
1206      caller.  */
1207   iv->ssa_name = NULL_TREE;
1208
1209   if (dump_file && (dump_flags & TDF_DETAILS))
1210     dump_use (dump_file, use);
1211
1212   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1213
1214   return use;
1215 }
1216
1217 /* Checks whether OP is a loop-level invariant and if so, records it.
1218    NONLINEAR_USE is true if the invariant is used in a way we do not
1219    handle specially.  */
1220
1221 static void
1222 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1223 {
1224   basic_block bb;
1225   struct version_info *info;
1226
1227   if (TREE_CODE (op) != SSA_NAME
1228       || !is_gimple_reg (op))
1229     return;
1230
1231   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1232   if (bb
1233       && flow_bb_inside_loop_p (data->current_loop, bb))
1234     return;
1235
1236   info = name_info (data, op);
1237   info->name = op;
1238   info->has_nonlin_use |= nonlinear_use;
1239   if (!info->inv_id)
1240     info->inv_id = ++data->max_inv_id;
1241   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1242 }
1243
1244 /* Checks whether the use OP is interesting and if so, records it.  */
1245
1246 static struct iv_use *
1247 find_interesting_uses_op (struct ivopts_data *data, tree op)
1248 {
1249   struct iv *iv;
1250   struct iv *civ;
1251   gimple stmt;
1252   struct iv_use *use;
1253
1254   if (TREE_CODE (op) != SSA_NAME)
1255     return NULL;
1256
1257   iv = get_iv (data, op);
1258   if (!iv)
1259     return NULL;
1260
1261   if (iv->have_use_for)
1262     {
1263       use = iv_use (data, iv->use_id);
1264
1265       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1266       return use;
1267     }
1268
1269   if (integer_zerop (iv->step))
1270     {
1271       record_invariant (data, op, true);
1272       return NULL;
1273     }
1274   iv->have_use_for = true;
1275
1276   civ = XNEW (struct iv);
1277   *civ = *iv;
1278
1279   stmt = SSA_NAME_DEF_STMT (op);
1280   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1281               || is_gimple_assign (stmt));
1282
1283   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1284   iv->use_id = use->id;
1285
1286   return use;
1287 }
1288
1289 /* Given a condition in statement STMT, checks whether it is a compare
1290    of an induction variable and an invariant.  If this is the case,
1291    CONTROL_VAR is set to location of the iv, BOUND to the location of
1292    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1293    induction variable descriptions, and true is returned.  If this is not
1294    the case, CONTROL_VAR and BOUND are set to the arguments of the
1295    condition and false is returned.  */
1296
1297 static bool
1298 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1299                        tree **control_var, tree **bound,
1300                        struct iv **iv_var, struct iv **iv_bound)
1301 {
1302   /* The objects returned when COND has constant operands.  */
1303   static struct iv const_iv;
1304   static tree zero;
1305   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1306   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1307   bool ret = false;
1308
1309   if (gimple_code (stmt) == GIMPLE_COND)
1310     {
1311       op0 = gimple_cond_lhs_ptr (stmt);
1312       op1 = gimple_cond_rhs_ptr (stmt);
1313     }
1314   else
1315     {
1316       op0 = gimple_assign_rhs1_ptr (stmt);
1317       op1 = gimple_assign_rhs2_ptr (stmt);
1318     }
1319
1320   zero = integer_zero_node;
1321   const_iv.step = integer_zero_node;
1322
1323   if (TREE_CODE (*op0) == SSA_NAME)
1324     iv0 = get_iv (data, *op0);
1325   if (TREE_CODE (*op1) == SSA_NAME)
1326     iv1 = get_iv (data, *op1);
1327
1328   /* Exactly one of the compared values must be an iv, and the other one must
1329      be an invariant.  */
1330   if (!iv0 || !iv1)
1331     goto end;
1332
1333   if (integer_zerop (iv0->step))
1334     {
1335       /* Control variable may be on the other side.  */
1336       tmp_op = op0; op0 = op1; op1 = tmp_op;
1337       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1338     }
1339   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1340
1341 end:
1342   if (control_var)
1343     *control_var = op0;;
1344   if (iv_var)
1345     *iv_var = iv0;;
1346   if (bound)
1347     *bound = op1;
1348   if (iv_bound)
1349     *iv_bound = iv1;
1350
1351   return ret;
1352 }
1353
1354 /* Checks whether the condition in STMT is interesting and if so,
1355    records it.  */
1356
1357 static void
1358 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1359 {
1360   tree *var_p, *bound_p;
1361   struct iv *var_iv, *civ;
1362
1363   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1364     {
1365       find_interesting_uses_op (data, *var_p);
1366       find_interesting_uses_op (data, *bound_p);
1367       return;
1368     }
1369
1370   civ = XNEW (struct iv);
1371   *civ = *var_iv;
1372   record_use (data, NULL, civ, stmt, USE_COMPARE);
1373 }
1374
1375 /* Returns true if expression EXPR is obviously invariant in LOOP,
1376    i.e. if all its operands are defined outside of the LOOP.  LOOP
1377    should not be the function body.  */
1378
1379 bool
1380 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1381 {
1382   basic_block def_bb;
1383   unsigned i, len;
1384
1385   gcc_assert (loop_depth (loop) > 0);
1386
1387   if (is_gimple_min_invariant (expr))
1388     return true;
1389
1390   if (TREE_CODE (expr) == SSA_NAME)
1391     {
1392       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1393       if (def_bb
1394           && flow_bb_inside_loop_p (loop, def_bb))
1395         return false;
1396
1397       return true;
1398     }
1399
1400   if (!EXPR_P (expr))
1401     return false;
1402
1403   len = TREE_OPERAND_LENGTH (expr);
1404   for (i = 0; i < len; i++)
1405     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1406       return false;
1407
1408   return true;
1409 }
1410
1411 /* Returns true if statement STMT is obviously invariant in LOOP,
1412    i.e. if all its operands on the RHS are defined outside of the LOOP.
1413    LOOP should not be the function body.  */
1414
1415 bool
1416 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1417 {
1418   unsigned i;
1419   tree lhs;
1420
1421   gcc_assert (loop_depth (loop) > 0);
1422
1423   lhs = gimple_get_lhs (stmt);
1424   for (i = 0; i < gimple_num_ops (stmt); i++)
1425     {
1426       tree op = gimple_op (stmt, i);
1427       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1428         return false;
1429     }
1430
1431   return true;
1432 }
1433
1434 /* Cumulates the steps of indices into DATA and replaces their values with the
1435    initial ones.  Returns false when the value of the index cannot be determined.
1436    Callback for for_each_index.  */
1437
1438 struct ifs_ivopts_data
1439 {
1440   struct ivopts_data *ivopts_data;
1441   gimple stmt;
1442   tree step;
1443 };
1444
1445 static bool
1446 idx_find_step (tree base, tree *idx, void *data)
1447 {
1448   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1449   struct iv *iv;
1450   tree step, iv_base, iv_step, lbound, off;
1451   struct loop *loop = dta->ivopts_data->current_loop;
1452
1453   /* If base is a component ref, require that the offset of the reference
1454      be invariant.  */
1455   if (TREE_CODE (base) == COMPONENT_REF)
1456     {
1457       off = component_ref_field_offset (base);
1458       return expr_invariant_in_loop_p (loop, off);
1459     }
1460
1461   /* If base is array, first check whether we will be able to move the
1462      reference out of the loop (in order to take its address in strength
1463      reduction).  In order for this to work we need both lower bound
1464      and step to be loop invariants.  */
1465   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1466     {
1467       /* Moreover, for a range, the size needs to be invariant as well.  */
1468       if (TREE_CODE (base) == ARRAY_RANGE_REF
1469           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1470         return false;
1471
1472       step = array_ref_element_size (base);
1473       lbound = array_ref_low_bound (base);
1474
1475       if (!expr_invariant_in_loop_p (loop, step)
1476           || !expr_invariant_in_loop_p (loop, lbound))
1477         return false;
1478     }
1479
1480   if (TREE_CODE (*idx) != SSA_NAME)
1481     return true;
1482
1483   iv = get_iv (dta->ivopts_data, *idx);
1484   if (!iv)
1485     return false;
1486
1487   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1488           *&x[0], which is not folded and does not trigger the
1489           ARRAY_REF path below.  */
1490   *idx = iv->base;
1491
1492   if (integer_zerop (iv->step))
1493     return true;
1494
1495   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1496     {
1497       step = array_ref_element_size (base);
1498
1499       /* We only handle addresses whose step is an integer constant.  */
1500       if (TREE_CODE (step) != INTEGER_CST)
1501         return false;
1502     }
1503   else
1504     /* The step for pointer arithmetics already is 1 byte.  */
1505     step = size_one_node;
1506
1507   iv_base = iv->base;
1508   iv_step = iv->step;
1509   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1510                             sizetype, &iv_base, &iv_step, dta->stmt,
1511                             false))
1512     {
1513       /* The index might wrap.  */
1514       return false;
1515     }
1516
1517   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1518   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1519
1520   return true;
1521 }
1522
1523 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1524    object is passed to it in DATA.  */
1525
1526 static bool
1527 idx_record_use (tree base, tree *idx,
1528                 void *vdata)
1529 {
1530   struct ivopts_data *data = (struct ivopts_data *) vdata;
1531   find_interesting_uses_op (data, *idx);
1532   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1533     {
1534       find_interesting_uses_op (data, array_ref_element_size (base));
1535       find_interesting_uses_op (data, array_ref_low_bound (base));
1536     }
1537   return true;
1538 }
1539
1540 /* If we can prove that TOP = cst * BOT for some constant cst,
1541    store cst to MUL and return true.  Otherwise return false.
1542    The returned value is always sign-extended, regardless of the
1543    signedness of TOP and BOT.  */
1544
1545 static bool
1546 constant_multiple_of (tree top, tree bot, double_int *mul)
1547 {
1548   tree mby;
1549   enum tree_code code;
1550   double_int res, p0, p1;
1551   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1552
1553   STRIP_NOPS (top);
1554   STRIP_NOPS (bot);
1555
1556   if (operand_equal_p (top, bot, 0))
1557     {
1558       *mul = double_int_one;
1559       return true;
1560     }
1561
1562   code = TREE_CODE (top);
1563   switch (code)
1564     {
1565     case MULT_EXPR:
1566       mby = TREE_OPERAND (top, 1);
1567       if (TREE_CODE (mby) != INTEGER_CST)
1568         return false;
1569
1570       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1571         return false;
1572
1573       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1574                               precision);
1575       return true;
1576
1577     case PLUS_EXPR:
1578     case MINUS_EXPR:
1579       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1580           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1581         return false;
1582
1583       if (code == MINUS_EXPR)
1584         p1 = double_int_neg (p1);
1585       *mul = double_int_sext (double_int_add (p0, p1), precision);
1586       return true;
1587
1588     case INTEGER_CST:
1589       if (TREE_CODE (bot) != INTEGER_CST)
1590         return false;
1591
1592       p0 = double_int_sext (tree_to_double_int (top), precision);
1593       p1 = double_int_sext (tree_to_double_int (bot), precision);
1594       if (double_int_zero_p (p1))
1595         return false;
1596       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1597                               precision);
1598       return double_int_zero_p (res);
1599
1600     default:
1601       return false;
1602     }
1603 }
1604
1605 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1606
1607 static bool
1608 may_be_unaligned_p (tree ref, tree step)
1609 {
1610   tree base;
1611   tree base_type;
1612   HOST_WIDE_INT bitsize;
1613   HOST_WIDE_INT bitpos;
1614   tree toffset;
1615   enum machine_mode mode;
1616   int unsignedp, volatilep;
1617   unsigned base_align;
1618
1619   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1620      thus they are not misaligned.  */
1621   if (TREE_CODE (ref) == TARGET_MEM_REF)
1622     return false;
1623
1624   /* The test below is basically copy of what expr.c:normal_inner_ref
1625      does to check whether the object must be loaded by parts when
1626      STRICT_ALIGNMENT is true.  */
1627   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1628                               &unsignedp, &volatilep, true);
1629   base_type = TREE_TYPE (base);
1630   base_align = TYPE_ALIGN (base_type);
1631
1632   if (mode != BLKmode)
1633     {
1634       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1635
1636       if (base_align < mode_align
1637           || (bitpos % mode_align) != 0
1638           || (bitpos % BITS_PER_UNIT) != 0)
1639         return true;
1640
1641       if (toffset
1642           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1643         return true;
1644
1645       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1646         return true;
1647     }
1648
1649   return false;
1650 }
1651
1652 /* Return true if EXPR may be non-addressable.   */
1653
1654 bool
1655 may_be_nonaddressable_p (tree expr)
1656 {
1657   switch (TREE_CODE (expr))
1658     {
1659     case TARGET_MEM_REF:
1660       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1661          target, thus they are always addressable.  */
1662       return false;
1663
1664     case COMPONENT_REF:
1665       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1666              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1667
1668     case VIEW_CONVERT_EXPR:
1669       /* This kind of view-conversions may wrap non-addressable objects
1670          and make them look addressable.  After some processing the
1671          non-addressability may be uncovered again, causing ADDR_EXPRs
1672          of inappropriate objects to be built.  */
1673       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1674           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1675         return true;
1676
1677       /* ... fall through ... */
1678
1679     case ARRAY_REF:
1680     case ARRAY_RANGE_REF:
1681       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1682
1683     CASE_CONVERT:
1684       return true;
1685
1686     default:
1687       break;
1688     }
1689
1690   return false;
1691 }
1692
1693 /* Finds addresses in *OP_P inside STMT.  */
1694
1695 static void
1696 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1697 {
1698   tree base = *op_p, step = size_zero_node;
1699   struct iv *civ;
1700   struct ifs_ivopts_data ifs_ivopts_data;
1701
1702   /* Do not play with volatile memory references.  A bit too conservative,
1703      perhaps, but safe.  */
1704   if (gimple_has_volatile_ops (stmt))
1705     goto fail;
1706
1707   /* Ignore bitfields for now.  Not really something terribly complicated
1708      to handle.  TODO.  */
1709   if (TREE_CODE (base) == BIT_FIELD_REF)
1710     goto fail;
1711
1712   base = unshare_expr (base);
1713
1714   if (TREE_CODE (base) == TARGET_MEM_REF)
1715     {
1716       tree type = build_pointer_type (TREE_TYPE (base));
1717       tree astep;
1718
1719       if (TMR_BASE (base)
1720           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1721         {
1722           civ = get_iv (data, TMR_BASE (base));
1723           if (!civ)
1724             goto fail;
1725
1726           TMR_BASE (base) = civ->base;
1727           step = civ->step;
1728         }
1729       if (TMR_INDEX2 (base)
1730           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1731         {
1732           civ = get_iv (data, TMR_INDEX2 (base));
1733           if (!civ)
1734             goto fail;
1735
1736           TMR_INDEX2 (base) = civ->base;
1737           step = civ->step;
1738         }
1739       if (TMR_INDEX (base)
1740           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1741         {
1742           civ = get_iv (data, TMR_INDEX (base));
1743           if (!civ)
1744             goto fail;
1745
1746           TMR_INDEX (base) = civ->base;
1747           astep = civ->step;
1748
1749           if (astep)
1750             {
1751               if (TMR_STEP (base))
1752                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1753
1754               step = fold_build2 (PLUS_EXPR, type, step, astep);
1755             }
1756         }
1757
1758       if (integer_zerop (step))
1759         goto fail;
1760       base = tree_mem_ref_addr (type, base);
1761     }
1762   else
1763     {
1764       ifs_ivopts_data.ivopts_data = data;
1765       ifs_ivopts_data.stmt = stmt;
1766       ifs_ivopts_data.step = size_zero_node;
1767       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1768           || integer_zerop (ifs_ivopts_data.step))
1769         goto fail;
1770       step = ifs_ivopts_data.step;
1771
1772       /* Check that the base expression is addressable.  This needs
1773          to be done after substituting bases of IVs into it.  */
1774       if (may_be_nonaddressable_p (base))
1775         goto fail;
1776
1777       /* Moreover, on strict alignment platforms, check that it is
1778          sufficiently aligned.  */
1779       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1780         goto fail;
1781
1782       base = build_fold_addr_expr (base);
1783
1784       /* Substituting bases of IVs into the base expression might
1785          have caused folding opportunities.  */
1786       if (TREE_CODE (base) == ADDR_EXPR)
1787         {
1788           tree *ref = &TREE_OPERAND (base, 0);
1789           while (handled_component_p (*ref))
1790             ref = &TREE_OPERAND (*ref, 0);
1791           if (TREE_CODE (*ref) == MEM_REF)
1792             {
1793               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1794                                       TREE_OPERAND (*ref, 0),
1795                                       TREE_OPERAND (*ref, 1));
1796               if (tem)
1797                 *ref = tem;
1798             }
1799         }
1800     }
1801
1802   civ = alloc_iv (base, step);
1803   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1804   return;
1805
1806 fail:
1807   for_each_index (op_p, idx_record_use, data);
1808 }
1809
1810 /* Finds and records invariants used in STMT.  */
1811
1812 static void
1813 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1814 {
1815   ssa_op_iter iter;
1816   use_operand_p use_p;
1817   tree op;
1818
1819   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1820     {
1821       op = USE_FROM_PTR (use_p);
1822       record_invariant (data, op, false);
1823     }
1824 }
1825
1826 /* Finds interesting uses of induction variables in the statement STMT.  */
1827
1828 static void
1829 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1830 {
1831   struct iv *iv;
1832   tree op, *lhs, *rhs;
1833   ssa_op_iter iter;
1834   use_operand_p use_p;
1835   enum tree_code code;
1836
1837   find_invariants_stmt (data, stmt);
1838
1839   if (gimple_code (stmt) == GIMPLE_COND)
1840     {
1841       find_interesting_uses_cond (data, stmt);
1842       return;
1843     }
1844
1845   if (is_gimple_assign (stmt))
1846     {
1847       lhs = gimple_assign_lhs_ptr (stmt);
1848       rhs = gimple_assign_rhs1_ptr (stmt);
1849
1850       if (TREE_CODE (*lhs) == SSA_NAME)
1851         {
1852           /* If the statement defines an induction variable, the uses are not
1853              interesting by themselves.  */
1854
1855           iv = get_iv (data, *lhs);
1856
1857           if (iv && !integer_zerop (iv->step))
1858             return;
1859         }
1860
1861       code = gimple_assign_rhs_code (stmt);
1862       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1863           && (REFERENCE_CLASS_P (*rhs)
1864               || is_gimple_val (*rhs)))
1865         {
1866           if (REFERENCE_CLASS_P (*rhs))
1867             find_interesting_uses_address (data, stmt, rhs);
1868           else
1869             find_interesting_uses_op (data, *rhs);
1870
1871           if (REFERENCE_CLASS_P (*lhs))
1872             find_interesting_uses_address (data, stmt, lhs);
1873           return;
1874         }
1875       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1876         {
1877           find_interesting_uses_cond (data, stmt);
1878           return;
1879         }
1880
1881       /* TODO -- we should also handle address uses of type
1882
1883          memory = call (whatever);
1884
1885          and
1886
1887          call (memory).  */
1888     }
1889
1890   if (gimple_code (stmt) == GIMPLE_PHI
1891       && gimple_bb (stmt) == data->current_loop->header)
1892     {
1893       iv = get_iv (data, PHI_RESULT (stmt));
1894
1895       if (iv && !integer_zerop (iv->step))
1896         return;
1897     }
1898
1899   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1900     {
1901       op = USE_FROM_PTR (use_p);
1902
1903       if (TREE_CODE (op) != SSA_NAME)
1904         continue;
1905
1906       iv = get_iv (data, op);
1907       if (!iv)
1908         continue;
1909
1910       find_interesting_uses_op (data, op);
1911     }
1912 }
1913
1914 /* Finds interesting uses of induction variables outside of loops
1915    on loop exit edge EXIT.  */
1916
1917 static void
1918 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1919 {
1920   gimple phi;
1921   gimple_stmt_iterator psi;
1922   tree def;
1923
1924   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1925     {
1926       phi = gsi_stmt (psi);
1927       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1928       if (is_gimple_reg (def))
1929         find_interesting_uses_op (data, def);
1930     }
1931 }
1932
1933 /* Finds uses of the induction variables that are interesting.  */
1934
1935 static void
1936 find_interesting_uses (struct ivopts_data *data)
1937 {
1938   basic_block bb;
1939   gimple_stmt_iterator bsi;
1940   basic_block *body = get_loop_body (data->current_loop);
1941   unsigned i;
1942   struct version_info *info;
1943   edge e;
1944
1945   if (dump_file && (dump_flags & TDF_DETAILS))
1946     fprintf (dump_file, "Uses:\n\n");
1947
1948   for (i = 0; i < data->current_loop->num_nodes; i++)
1949     {
1950       edge_iterator ei;
1951       bb = body[i];
1952
1953       FOR_EACH_EDGE (e, ei, bb->succs)
1954         if (e->dest != EXIT_BLOCK_PTR
1955             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1956           find_interesting_uses_outside (data, e);
1957
1958       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1959         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1960       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1961         if (!is_gimple_debug (gsi_stmt (bsi)))
1962           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1963     }
1964
1965   if (dump_file && (dump_flags & TDF_DETAILS))
1966     {
1967       bitmap_iterator bi;
1968
1969       fprintf (dump_file, "\n");
1970
1971       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1972         {
1973           info = ver_info (data, i);
1974           if (info->inv_id)
1975             {
1976               fprintf (dump_file, "  ");
1977               print_generic_expr (dump_file, info->name, TDF_SLIM);
1978               fprintf (dump_file, " is invariant (%d)%s\n",
1979                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1980             }
1981         }
1982
1983       fprintf (dump_file, "\n");
1984     }
1985
1986   free (body);
1987 }
1988
1989 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1990    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1991    we are at the top-level of the processed address.  */
1992
1993 static tree
1994 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1995                 unsigned HOST_WIDE_INT *offset)
1996 {
1997   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1998   enum tree_code code;
1999   tree type, orig_type = TREE_TYPE (expr);
2000   unsigned HOST_WIDE_INT off0, off1, st;
2001   tree orig_expr = expr;
2002
2003   STRIP_NOPS (expr);
2004
2005   type = TREE_TYPE (expr);
2006   code = TREE_CODE (expr);
2007   *offset = 0;
2008
2009   switch (code)
2010     {
2011     case INTEGER_CST:
2012       if (!cst_and_fits_in_hwi (expr)
2013           || integer_zerop (expr))
2014         return orig_expr;
2015
2016       *offset = int_cst_value (expr);
2017       return build_int_cst (orig_type, 0);
2018
2019     case POINTER_PLUS_EXPR:
2020     case PLUS_EXPR:
2021     case MINUS_EXPR:
2022       op0 = TREE_OPERAND (expr, 0);
2023       op1 = TREE_OPERAND (expr, 1);
2024
2025       op0 = strip_offset_1 (op0, false, false, &off0);
2026       op1 = strip_offset_1 (op1, false, false, &off1);
2027
2028       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2029       if (op0 == TREE_OPERAND (expr, 0)
2030           && op1 == TREE_OPERAND (expr, 1))
2031         return orig_expr;
2032
2033       if (integer_zerop (op1))
2034         expr = op0;
2035       else if (integer_zerop (op0))
2036         {
2037           if (code == MINUS_EXPR)
2038             expr = fold_build1 (NEGATE_EXPR, type, op1);
2039           else
2040             expr = op1;
2041         }
2042       else
2043         expr = fold_build2 (code, type, op0, op1);
2044
2045       return fold_convert (orig_type, expr);
2046
2047     case MULT_EXPR:
2048       op1 = TREE_OPERAND (expr, 1);
2049       if (!cst_and_fits_in_hwi (op1))
2050         return orig_expr;
2051
2052       op0 = TREE_OPERAND (expr, 0);
2053       op0 = strip_offset_1 (op0, false, false, &off0);
2054       if (op0 == TREE_OPERAND (expr, 0))
2055         return orig_expr;
2056
2057       *offset = off0 * int_cst_value (op1);
2058       if (integer_zerop (op0))
2059         expr = op0;
2060       else
2061         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2062
2063       return fold_convert (orig_type, expr);
2064
2065     case ARRAY_REF:
2066     case ARRAY_RANGE_REF:
2067       if (!inside_addr)
2068         return orig_expr;
2069
2070       step = array_ref_element_size (expr);
2071       if (!cst_and_fits_in_hwi (step))
2072         break;
2073
2074       st = int_cst_value (step);
2075       op1 = TREE_OPERAND (expr, 1);
2076       op1 = strip_offset_1 (op1, false, false, &off1);
2077       *offset = off1 * st;
2078
2079       if (top_compref
2080           && integer_zerop (op1))
2081         {
2082           /* Strip the component reference completely.  */
2083           op0 = TREE_OPERAND (expr, 0);
2084           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2085           *offset += off0;
2086           return op0;
2087         }
2088       break;
2089
2090     case COMPONENT_REF:
2091       if (!inside_addr)
2092         return orig_expr;
2093
2094       tmp = component_ref_field_offset (expr);
2095       if (top_compref
2096           && cst_and_fits_in_hwi (tmp))
2097         {
2098           /* Strip the component reference completely.  */
2099           op0 = TREE_OPERAND (expr, 0);
2100           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2101           *offset = off0 + int_cst_value (tmp);
2102           return op0;
2103         }
2104       break;
2105
2106     case ADDR_EXPR:
2107       op0 = TREE_OPERAND (expr, 0);
2108       op0 = strip_offset_1 (op0, true, true, &off0);
2109       *offset += off0;
2110
2111       if (op0 == TREE_OPERAND (expr, 0))
2112         return orig_expr;
2113
2114       expr = build_fold_addr_expr (op0);
2115       return fold_convert (orig_type, expr);
2116
2117     case MEM_REF:
2118       /* ???  Offset operand?  */
2119       inside_addr = false;
2120       break;
2121
2122     default:
2123       return orig_expr;
2124     }
2125
2126   /* Default handling of expressions for that we want to recurse into
2127      the first operand.  */
2128   op0 = TREE_OPERAND (expr, 0);
2129   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2130   *offset += off0;
2131
2132   if (op0 == TREE_OPERAND (expr, 0)
2133       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2134     return orig_expr;
2135
2136   expr = copy_node (expr);
2137   TREE_OPERAND (expr, 0) = op0;
2138   if (op1)
2139     TREE_OPERAND (expr, 1) = op1;
2140
2141   /* Inside address, we might strip the top level component references,
2142      thus changing type of the expression.  Handling of ADDR_EXPR
2143      will fix that.  */
2144   expr = fold_convert (orig_type, expr);
2145
2146   return expr;
2147 }
2148
2149 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2150
2151 static tree
2152 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2153 {
2154   return strip_offset_1 (expr, false, false, offset);
2155 }
2156
2157 /* Returns variant of TYPE that can be used as base for different uses.
2158    We return unsigned type with the same precision, which avoids problems
2159    with overflows.  */
2160
2161 static tree
2162 generic_type_for (tree type)
2163 {
2164   if (POINTER_TYPE_P (type))
2165     return unsigned_type_for (type);
2166
2167   if (TYPE_UNSIGNED (type))
2168     return type;
2169
2170   return unsigned_type_for (type);
2171 }
2172
2173 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2174    the bitmap to that we should store it.  */
2175
2176 static struct ivopts_data *fd_ivopts_data;
2177 static tree
2178 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2179 {
2180   bitmap *depends_on = (bitmap *) data;
2181   struct version_info *info;
2182
2183   if (TREE_CODE (*expr_p) != SSA_NAME)
2184     return NULL_TREE;
2185   info = name_info (fd_ivopts_data, *expr_p);
2186
2187   if (!info->inv_id || info->has_nonlin_use)
2188     return NULL_TREE;
2189
2190   if (!*depends_on)
2191     *depends_on = BITMAP_ALLOC (NULL);
2192   bitmap_set_bit (*depends_on, info->inv_id);
2193
2194   return NULL_TREE;
2195 }
2196
2197 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2198    position to POS.  If USE is not NULL, the candidate is set as related to
2199    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2200    replacement of the final value of the iv by a direct computation.  */
2201
2202 static struct iv_cand *
2203 add_candidate_1 (struct ivopts_data *data,
2204                  tree base, tree step, bool important, enum iv_position pos,
2205                  struct iv_use *use, gimple incremented_at)
2206 {
2207   unsigned i;
2208   struct iv_cand *cand = NULL;
2209   tree type, orig_type;
2210
2211   if (base)
2212     {
2213       orig_type = TREE_TYPE (base);
2214       type = generic_type_for (orig_type);
2215       if (type != orig_type)
2216         {
2217           base = fold_convert (type, base);
2218           step = fold_convert (type, step);
2219         }
2220     }
2221
2222   for (i = 0; i < n_iv_cands (data); i++)
2223     {
2224       cand = iv_cand (data, i);
2225
2226       if (cand->pos != pos)
2227         continue;
2228
2229       if (cand->incremented_at != incremented_at
2230           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2231               && cand->ainc_use != use))
2232         continue;
2233
2234       if (!cand->iv)
2235         {
2236           if (!base && !step)
2237             break;
2238
2239           continue;
2240         }
2241
2242       if (!base && !step)
2243         continue;
2244
2245       if (operand_equal_p (base, cand->iv->base, 0)
2246           && operand_equal_p (step, cand->iv->step, 0)
2247           && (TYPE_PRECISION (TREE_TYPE (base))
2248               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2249         break;
2250     }
2251
2252   if (i == n_iv_cands (data))
2253     {
2254       cand = XCNEW (struct iv_cand);
2255       cand->id = i;
2256
2257       if (!base && !step)
2258         cand->iv = NULL;
2259       else
2260         cand->iv = alloc_iv (base, step);
2261
2262       cand->pos = pos;
2263       if (pos != IP_ORIGINAL && cand->iv)
2264         {
2265           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2266           cand->var_after = cand->var_before;
2267         }
2268       cand->important = important;
2269       cand->incremented_at = incremented_at;
2270       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2271
2272       if (step
2273           && TREE_CODE (step) != INTEGER_CST)
2274         {
2275           fd_ivopts_data = data;
2276           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2277         }
2278
2279       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2280         cand->ainc_use = use;
2281       else
2282         cand->ainc_use = NULL;
2283
2284       if (dump_file && (dump_flags & TDF_DETAILS))
2285         dump_cand (dump_file, cand);
2286     }
2287
2288   if (important && !cand->important)
2289     {
2290       cand->important = true;
2291       if (dump_file && (dump_flags & TDF_DETAILS))
2292         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2293     }
2294
2295   if (use)
2296     {
2297       bitmap_set_bit (use->related_cands, i);
2298       if (dump_file && (dump_flags & TDF_DETAILS))
2299         fprintf (dump_file, "Candidate %d is related to use %d\n",
2300                  cand->id, use->id);
2301     }
2302
2303   return cand;
2304 }
2305
2306 /* Returns true if incrementing the induction variable at the end of the LOOP
2307    is allowed.
2308
2309    The purpose is to avoid splitting latch edge with a biv increment, thus
2310    creating a jump, possibly confusing other optimization passes and leaving
2311    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2312    is not available (so we do not have a better alternative), or if the latch
2313    edge is already nonempty.  */
2314
2315 static bool
2316 allow_ip_end_pos_p (struct loop *loop)
2317 {
2318   if (!ip_normal_pos (loop))
2319     return true;
2320
2321   if (!empty_block_p (ip_end_pos (loop)))
2322     return true;
2323
2324   return false;
2325 }
2326
2327 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2328    Important field is set to IMPORTANT.  */
2329
2330 static void
2331 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2332                         bool important, struct iv_use *use)
2333 {
2334   basic_block use_bb = gimple_bb (use->stmt);
2335   enum machine_mode mem_mode;
2336   unsigned HOST_WIDE_INT cstepi;
2337
2338   /* If we insert the increment in any position other than the standard
2339      ones, we must ensure that it is incremented once per iteration.
2340      It must not be in an inner nested loop, or one side of an if
2341      statement.  */
2342   if (use_bb->loop_father != data->current_loop
2343       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2344       || stmt_could_throw_p (use->stmt)
2345       || !cst_and_fits_in_hwi (step))
2346     return;
2347
2348   cstepi = int_cst_value (step);
2349
2350   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2351   if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2352       || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2353     {
2354       enum tree_code code = MINUS_EXPR;
2355       tree new_base;
2356       tree new_step = step;
2357
2358       if (POINTER_TYPE_P (TREE_TYPE (base)))
2359         {
2360           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2361           code = POINTER_PLUS_EXPR;
2362         }
2363       else
2364         new_step = fold_convert (TREE_TYPE (base), new_step);
2365       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2366       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2367                        use->stmt);
2368     }
2369   if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2370       || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2371     {
2372       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2373                        use->stmt);
2374     }
2375 }
2376
2377 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2378    position to POS.  If USE is not NULL, the candidate is set as related to
2379    it.  The candidate computation is scheduled on all available positions.  */
2380
2381 static void
2382 add_candidate (struct ivopts_data *data,
2383                tree base, tree step, bool important, struct iv_use *use)
2384 {
2385   if (ip_normal_pos (data->current_loop))
2386     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2387   if (ip_end_pos (data->current_loop)
2388       && allow_ip_end_pos_p (data->current_loop))
2389     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2390
2391   if (use != NULL && use->type == USE_ADDRESS)
2392     add_autoinc_candidates (data, base, step, important, use);
2393 }
2394
2395 /* Add a standard "0 + 1 * iteration" iv candidate for a
2396    type with SIZE bits.  */
2397
2398 static void
2399 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2400                                      unsigned int size)
2401 {
2402   tree type = lang_hooks.types.type_for_size (size, true);
2403   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2404                  true, NULL);
2405 }
2406
2407 /* Adds standard iv candidates.  */
2408
2409 static void
2410 add_standard_iv_candidates (struct ivopts_data *data)
2411 {
2412   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2413
2414   /* The same for a double-integer type if it is still fast enough.  */
2415   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2416     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2417 }
2418
2419
2420 /* Adds candidates bases on the old induction variable IV.  */
2421
2422 static void
2423 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2424 {
2425   gimple phi;
2426   tree def;
2427   struct iv_cand *cand;
2428
2429   add_candidate (data, iv->base, iv->step, true, NULL);
2430
2431   /* The same, but with initial value zero.  */
2432   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2433     add_candidate (data, size_int (0), iv->step, true, NULL);
2434   else
2435     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2436                    iv->step, true, NULL);
2437
2438   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2439   if (gimple_code (phi) == GIMPLE_PHI)
2440     {
2441       /* Additionally record the possibility of leaving the original iv
2442          untouched.  */
2443       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2444       cand = add_candidate_1 (data,
2445                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2446                               SSA_NAME_DEF_STMT (def));
2447       cand->var_before = iv->ssa_name;
2448       cand->var_after = def;
2449     }
2450 }
2451
2452 /* Adds candidates based on the old induction variables.  */
2453
2454 static void
2455 add_old_ivs_candidates (struct ivopts_data *data)
2456 {
2457   unsigned i;
2458   struct iv *iv;
2459   bitmap_iterator bi;
2460
2461   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2462     {
2463       iv = ver_info (data, i)->iv;
2464       if (iv && iv->biv_p && !integer_zerop (iv->step))
2465         add_old_iv_candidates (data, iv);
2466     }
2467 }
2468
2469 /* Adds candidates based on the value of the induction variable IV and USE.  */
2470
2471 static void
2472 add_iv_value_candidates (struct ivopts_data *data,
2473                          struct iv *iv, struct iv_use *use)
2474 {
2475   unsigned HOST_WIDE_INT offset;
2476   tree base;
2477   tree basetype;
2478
2479   add_candidate (data, iv->base, iv->step, false, use);
2480
2481   /* The same, but with initial value zero.  Make such variable important,
2482      since it is generic enough so that possibly many uses may be based
2483      on it.  */
2484   basetype = TREE_TYPE (iv->base);
2485   if (POINTER_TYPE_P (basetype))
2486     basetype = sizetype;
2487   add_candidate (data, build_int_cst (basetype, 0),
2488                  iv->step, true, use);
2489
2490   /* Third, try removing the constant offset.  Make sure to even
2491      add a candidate for &a[0] vs. (T *)&a.  */
2492   base = strip_offset (iv->base, &offset);
2493   if (offset
2494       || base != iv->base)
2495     add_candidate (data, base, iv->step, false, use);
2496 }
2497
2498 /* Adds candidates based on the uses.  */
2499
2500 static void
2501 add_derived_ivs_candidates (struct ivopts_data *data)
2502 {
2503   unsigned i;
2504
2505   for (i = 0; i < n_iv_uses (data); i++)
2506     {
2507       struct iv_use *use = iv_use (data, i);
2508
2509       if (!use)
2510         continue;
2511
2512       switch (use->type)
2513         {
2514         case USE_NONLINEAR_EXPR:
2515         case USE_COMPARE:
2516         case USE_ADDRESS:
2517           /* Just add the ivs based on the value of the iv used here.  */
2518           add_iv_value_candidates (data, use->iv, use);
2519           break;
2520
2521         default:
2522           gcc_unreachable ();
2523         }
2524     }
2525 }
2526
2527 /* Record important candidates and add them to related_cands bitmaps
2528    if needed.  */
2529
2530 static void
2531 record_important_candidates (struct ivopts_data *data)
2532 {
2533   unsigned i;
2534   struct iv_use *use;
2535
2536   for (i = 0; i < n_iv_cands (data); i++)
2537     {
2538       struct iv_cand *cand = iv_cand (data, i);
2539
2540       if (cand->important)
2541         bitmap_set_bit (data->important_candidates, i);
2542     }
2543
2544   data->consider_all_candidates = (n_iv_cands (data)
2545                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2546
2547   if (data->consider_all_candidates)
2548     {
2549       /* We will not need "related_cands" bitmaps in this case,
2550          so release them to decrease peak memory consumption.  */
2551       for (i = 0; i < n_iv_uses (data); i++)
2552         {
2553           use = iv_use (data, i);
2554           BITMAP_FREE (use->related_cands);
2555         }
2556     }
2557   else
2558     {
2559       /* Add important candidates to the related_cands bitmaps.  */
2560       for (i = 0; i < n_iv_uses (data); i++)
2561         bitmap_ior_into (iv_use (data, i)->related_cands,
2562                          data->important_candidates);
2563     }
2564 }
2565
2566 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2567    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2568    we allocate a simple list to every use.  */
2569
2570 static void
2571 alloc_use_cost_map (struct ivopts_data *data)
2572 {
2573   unsigned i, size, s, j;
2574
2575   for (i = 0; i < n_iv_uses (data); i++)
2576     {
2577       struct iv_use *use = iv_use (data, i);
2578       bitmap_iterator bi;
2579
2580       if (data->consider_all_candidates)
2581         size = n_iv_cands (data);
2582       else
2583         {
2584           s = 0;
2585           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2586             {
2587               s++;
2588             }
2589
2590           /* Round up to the power of two, so that moduling by it is fast.  */
2591           for (size = 1; size < s; size <<= 1)
2592             continue;
2593         }
2594
2595       use->n_map_members = size;
2596       use->cost_map = XCNEWVEC (struct cost_pair, size);
2597     }
2598 }
2599
2600 /* Returns description of computation cost of expression whose runtime
2601    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2602
2603 static comp_cost
2604 new_cost (unsigned runtime, unsigned complexity)
2605 {
2606   comp_cost cost;
2607
2608   cost.cost = runtime;
2609   cost.complexity = complexity;
2610
2611   return cost;
2612 }
2613
2614 /* Adds costs COST1 and COST2.  */
2615
2616 static comp_cost
2617 add_costs (comp_cost cost1, comp_cost cost2)
2618 {
2619   cost1.cost += cost2.cost;
2620   cost1.complexity += cost2.complexity;
2621
2622   return cost1;
2623 }
2624 /* Subtracts costs COST1 and COST2.  */
2625
2626 static comp_cost
2627 sub_costs (comp_cost cost1, comp_cost cost2)
2628 {
2629   cost1.cost -= cost2.cost;
2630   cost1.complexity -= cost2.complexity;
2631
2632   return cost1;
2633 }
2634
2635 /* Returns a negative number if COST1 < COST2, a positive number if
2636    COST1 > COST2, and 0 if COST1 = COST2.  */
2637
2638 static int
2639 compare_costs (comp_cost cost1, comp_cost cost2)
2640 {
2641   if (cost1.cost == cost2.cost)
2642     return cost1.complexity - cost2.complexity;
2643
2644   return cost1.cost - cost2.cost;
2645 }
2646
2647 /* Returns true if COST is infinite.  */
2648
2649 static bool
2650 infinite_cost_p (comp_cost cost)
2651 {
2652   return cost.cost == INFTY;
2653 }
2654
2655 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2656    on invariants DEPENDS_ON and that the value used in expressing it
2657    is VALUE.  */
2658
2659 static void
2660 set_use_iv_cost (struct ivopts_data *data,
2661                  struct iv_use *use, struct iv_cand *cand,
2662                  comp_cost cost, bitmap depends_on, tree value,
2663                  int inv_expr_id)
2664 {
2665   unsigned i, s;
2666
2667   if (infinite_cost_p (cost))
2668     {
2669       BITMAP_FREE (depends_on);
2670       return;
2671     }
2672
2673   if (data->consider_all_candidates)
2674     {
2675       use->cost_map[cand->id].cand = cand;
2676       use->cost_map[cand->id].cost = cost;
2677       use->cost_map[cand->id].depends_on = depends_on;
2678       use->cost_map[cand->id].value = value;
2679       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2680       return;
2681     }
2682
2683   /* n_map_members is a power of two, so this computes modulo.  */
2684   s = cand->id & (use->n_map_members - 1);
2685   for (i = s; i < use->n_map_members; i++)
2686     if (!use->cost_map[i].cand)
2687       goto found;
2688   for (i = 0; i < s; i++)
2689     if (!use->cost_map[i].cand)
2690       goto found;
2691
2692   gcc_unreachable ();
2693
2694 found:
2695   use->cost_map[i].cand = cand;
2696   use->cost_map[i].cost = cost;
2697   use->cost_map[i].depends_on = depends_on;
2698   use->cost_map[i].value = value;
2699   use->cost_map[i].inv_expr_id = inv_expr_id;
2700 }
2701
2702 /* Gets cost of (USE, CANDIDATE) pair.  */
2703
2704 static struct cost_pair *
2705 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2706                  struct iv_cand *cand)
2707 {
2708   unsigned i, s;
2709   struct cost_pair *ret;
2710
2711   if (!cand)
2712     return NULL;
2713
2714   if (data->consider_all_candidates)
2715     {
2716       ret = use->cost_map + cand->id;
2717       if (!ret->cand)
2718         return NULL;
2719
2720       return ret;
2721     }
2722
2723   /* n_map_members is a power of two, so this computes modulo.  */
2724   s = cand->id & (use->n_map_members - 1);
2725   for (i = s; i < use->n_map_members; i++)
2726     if (use->cost_map[i].cand == cand)
2727       return use->cost_map + i;
2728
2729   for (i = 0; i < s; i++)
2730     if (use->cost_map[i].cand == cand)
2731       return use->cost_map + i;
2732
2733   return NULL;
2734 }
2735
2736 /* Returns estimate on cost of computing SEQ.  */
2737
2738 static unsigned
2739 seq_cost (rtx seq, bool speed)
2740 {
2741   unsigned cost = 0;
2742   rtx set;
2743
2744   for (; seq; seq = NEXT_INSN (seq))
2745     {
2746       set = single_set (seq);
2747       if (set)
2748         cost += rtx_cost (set, SET,speed);
2749       else
2750         cost++;
2751     }
2752
2753   return cost;
2754 }
2755
2756 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2757 static rtx
2758 produce_memory_decl_rtl (tree obj, int *regno)
2759 {
2760   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2761   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2762   rtx x;
2763
2764   gcc_assert (obj);
2765   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2766     {
2767       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2768       x = gen_rtx_SYMBOL_REF (address_mode, name);
2769       SET_SYMBOL_REF_DECL (x, obj);
2770       x = gen_rtx_MEM (DECL_MODE (obj), x);
2771       set_mem_addr_space (x, as);
2772       targetm.encode_section_info (obj, x, true);
2773     }
2774   else
2775     {
2776       x = gen_raw_REG (address_mode, (*regno)++);
2777       x = gen_rtx_MEM (DECL_MODE (obj), x);
2778       set_mem_addr_space (x, as);
2779     }
2780
2781   return x;
2782 }
2783
2784 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2785    walk_tree.  DATA contains the actual fake register number.  */
2786
2787 static tree
2788 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2789 {
2790   tree obj = NULL_TREE;
2791   rtx x = NULL_RTX;
2792   int *regno = (int *) data;
2793
2794   switch (TREE_CODE (*expr_p))
2795     {
2796     case ADDR_EXPR:
2797       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2798            handled_component_p (*expr_p);
2799            expr_p = &TREE_OPERAND (*expr_p, 0))
2800         continue;
2801       obj = *expr_p;
2802       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2803         x = produce_memory_decl_rtl (obj, regno);
2804       break;
2805
2806     case SSA_NAME:
2807       *ws = 0;
2808       obj = SSA_NAME_VAR (*expr_p);
2809       if (!DECL_RTL_SET_P (obj))
2810         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2811       break;
2812
2813     case VAR_DECL:
2814     case PARM_DECL:
2815     case RESULT_DECL:
2816       *ws = 0;
2817       obj = *expr_p;
2818
2819       if (DECL_RTL_SET_P (obj))
2820         break;
2821
2822       if (DECL_MODE (obj) == BLKmode)
2823         x = produce_memory_decl_rtl (obj, regno);
2824       else
2825         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2826
2827       break;
2828
2829     default:
2830       break;
2831     }
2832
2833   if (x)
2834     {
2835       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2836       SET_DECL_RTL (obj, x);
2837     }
2838
2839   return NULL_TREE;
2840 }
2841
2842 /* Determines cost of the computation of EXPR.  */
2843
2844 static unsigned
2845 computation_cost (tree expr, bool speed)
2846 {
2847   rtx seq, rslt;
2848   tree type = TREE_TYPE (expr);
2849   unsigned cost;
2850   /* Avoid using hard regs in ways which may be unsupported.  */
2851   int regno = LAST_VIRTUAL_REGISTER + 1;
2852   struct cgraph_node *node = cgraph_node (current_function_decl);
2853   enum node_frequency real_frequency = node->frequency;
2854
2855   node->frequency = NODE_FREQUENCY_NORMAL;
2856   crtl->maybe_hot_insn_p = speed;
2857   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2858   start_sequence ();
2859   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2860   seq = get_insns ();
2861   end_sequence ();
2862   default_rtl_profile ();
2863   node->frequency = real_frequency;
2864
2865   cost = seq_cost (seq, speed);
2866   if (MEM_P (rslt))
2867     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2868                           TYPE_ADDR_SPACE (type), speed);
2869
2870   return cost;
2871 }
2872
2873 /* Returns variable containing the value of candidate CAND at statement AT.  */
2874
2875 static tree
2876 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2877 {
2878   if (stmt_after_increment (loop, cand, stmt))
2879     return cand->var_after;
2880   else
2881     return cand->var_before;
2882 }
2883
2884 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2885    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2886
2887 int
2888 tree_int_cst_sign_bit (const_tree t)
2889 {
2890   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2891   unsigned HOST_WIDE_INT w;
2892
2893   if (bitno < HOST_BITS_PER_WIDE_INT)
2894     w = TREE_INT_CST_LOW (t);
2895   else
2896     {
2897       w = TREE_INT_CST_HIGH (t);
2898       bitno -= HOST_BITS_PER_WIDE_INT;
2899     }
2900
2901   return (w >> bitno) & 1;
2902 }
2903
2904 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2905    same precision that is at least as wide as the precision of TYPE, stores
2906    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2907    type of A and B.  */
2908
2909 static tree
2910 determine_common_wider_type (tree *a, tree *b)
2911 {
2912   tree wider_type = NULL;
2913   tree suba, subb;
2914   tree atype = TREE_TYPE (*a);
2915
2916   if (CONVERT_EXPR_P (*a))
2917     {
2918       suba = TREE_OPERAND (*a, 0);
2919       wider_type = TREE_TYPE (suba);
2920       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2921         return atype;
2922     }
2923   else
2924     return atype;
2925
2926   if (CONVERT_EXPR_P (*b))
2927     {
2928       subb = TREE_OPERAND (*b, 0);
2929       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2930         return atype;
2931     }
2932   else
2933     return atype;
2934
2935   *a = suba;
2936   *b = subb;
2937   return wider_type;
2938 }
2939
2940 /* Determines the expression by that USE is expressed from induction variable
2941    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2942    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2943
2944 static bool
2945 get_computation_aff (struct loop *loop,
2946                      struct iv_use *use, struct iv_cand *cand, gimple at,
2947                      struct affine_tree_combination *aff)
2948 {
2949   tree ubase = use->iv->base;
2950   tree ustep = use->iv->step;
2951   tree cbase = cand->iv->base;
2952   tree cstep = cand->iv->step, cstep_common;
2953   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2954   tree common_type, var;
2955   tree uutype;
2956   aff_tree cbase_aff, var_aff;
2957   double_int rat;
2958
2959   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2960     {
2961       /* We do not have a precision to express the values of use.  */
2962       return false;
2963     }
2964
2965   var = var_at_stmt (loop, cand, at);
2966   uutype = unsigned_type_for (utype);
2967
2968   /* If the conversion is not noop, perform it.  */
2969   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2970     {
2971       cstep = fold_convert (uutype, cstep);
2972       cbase = fold_convert (uutype, cbase);
2973       var = fold_convert (uutype, var);
2974     }
2975
2976   if (!constant_multiple_of (ustep, cstep, &rat))
2977     return false;
2978
2979   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2980      type, we achieve better folding by computing their difference in this
2981      wider type, and cast the result to UUTYPE.  We do not need to worry about
2982      overflows, as all the arithmetics will in the end be performed in UUTYPE
2983      anyway.  */
2984   common_type = determine_common_wider_type (&ubase, &cbase);
2985
2986   /* use = ubase - ratio * cbase + ratio * var.  */
2987   tree_to_aff_combination (ubase, common_type, aff);
2988   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2989   tree_to_aff_combination (var, uutype, &var_aff);
2990
2991   /* We need to shift the value if we are after the increment.  */
2992   if (stmt_after_increment (loop, cand, at))
2993     {
2994       aff_tree cstep_aff;
2995
2996       if (common_type != uutype)
2997         cstep_common = fold_convert (common_type, cstep);
2998       else
2999         cstep_common = cstep;
3000
3001       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3002       aff_combination_add (&cbase_aff, &cstep_aff);
3003     }
3004
3005   aff_combination_scale (&cbase_aff, double_int_neg (rat));
3006   aff_combination_add (aff, &cbase_aff);
3007   if (common_type != uutype)
3008     aff_combination_convert (aff, uutype);
3009
3010   aff_combination_scale (&var_aff, rat);
3011   aff_combination_add (aff, &var_aff);
3012
3013   return true;
3014 }
3015
3016 /* Determines the expression by that USE is expressed from induction variable
3017    CAND at statement AT in LOOP.  The computation is unshared.  */
3018
3019 static tree
3020 get_computation_at (struct loop *loop,
3021                     struct iv_use *use, struct iv_cand *cand, gimple at)
3022 {
3023   aff_tree aff;
3024   tree type = TREE_TYPE (use->iv->base);
3025
3026   if (!get_computation_aff (loop, use, cand, at, &aff))
3027     return NULL_TREE;
3028   unshare_aff_combination (&aff);
3029   return fold_convert (type, aff_combination_to_tree (&aff));
3030 }
3031
3032 /* Determines the expression by that USE is expressed from induction variable
3033    CAND in LOOP.  The computation is unshared.  */
3034
3035 static tree
3036 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3037 {
3038   return get_computation_at (loop, use, cand, use->stmt);
3039 }
3040
3041 /* Adjust the cost COST for being in loop setup rather than loop body.
3042    If we're optimizing for space, the loop setup overhead is constant;
3043    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3044 static unsigned
3045 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3046 {
3047   if (cost == INFTY)
3048     return cost;
3049   else if (optimize_loop_for_speed_p (data->current_loop))
3050     return cost / avg_loop_niter (data->current_loop);
3051   else
3052     return cost;
3053 }
3054
3055 /* Returns cost of addition in MODE.  */
3056
3057 static unsigned
3058 add_cost (enum machine_mode mode, bool speed)
3059 {
3060   static unsigned costs[NUM_MACHINE_MODES];
3061   rtx seq;
3062   unsigned cost;
3063
3064   if (costs[mode])
3065     return costs[mode];
3066
3067   start_sequence ();
3068   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3069                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3070                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3071                  NULL_RTX);
3072   seq = get_insns ();
3073   end_sequence ();
3074
3075   cost = seq_cost (seq, speed);
3076   if (!cost)
3077     cost = 1;
3078
3079   costs[mode] = cost;
3080
3081   if (dump_file && (dump_flags & TDF_DETAILS))
3082     fprintf (dump_file, "Addition in %s costs %d\n",
3083              GET_MODE_NAME (mode), cost);
3084   return cost;
3085 }
3086
3087 /* Entry in a hashtable of already known costs for multiplication.  */
3088 struct mbc_entry
3089 {
3090   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3091   enum machine_mode mode;       /* In mode.  */
3092   unsigned cost;                /* The cost.  */
3093 };
3094
3095 /* Counts hash value for the ENTRY.  */
3096
3097 static hashval_t
3098 mbc_entry_hash (const void *entry)
3099 {
3100   const struct mbc_entry *e = (const struct mbc_entry *) entry;
3101
3102   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3103 }
3104
3105 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3106
3107 static int
3108 mbc_entry_eq (const void *entry1, const void *entry2)
3109 {
3110   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3111   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3112
3113   return (e1->mode == e2->mode
3114           && e1->cst == e2->cst);
3115 }
3116
3117 /* Returns cost of multiplication by constant CST in MODE.  */
3118
3119 unsigned
3120 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3121 {
3122   static htab_t costs;
3123   struct mbc_entry **cached, act;
3124   rtx seq;
3125   unsigned cost;
3126
3127   if (!costs)
3128     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3129
3130   act.mode = mode;
3131   act.cst = cst;
3132   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3133   if (*cached)
3134     return (*cached)->cost;
3135
3136   *cached = XNEW (struct mbc_entry);
3137   (*cached)->mode = mode;
3138   (*cached)->cst = cst;
3139
3140   start_sequence ();
3141   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3142                gen_int_mode (cst, mode), NULL_RTX, 0);
3143   seq = get_insns ();
3144   end_sequence ();
3145
3146   cost = seq_cost (seq, speed);
3147
3148   if (dump_file && (dump_flags & TDF_DETAILS))
3149     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3150              (int) cst, GET_MODE_NAME (mode), cost);
3151
3152   (*cached)->cost = cost;
3153
3154   return cost;
3155 }
3156
3157 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3158    validity for a memory reference accessing memory of mode MODE in
3159    address space AS.  */
3160
3161 DEF_VEC_P (sbitmap);
3162 DEF_VEC_ALLOC_P (sbitmap, heap);
3163
3164 bool
3165 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3166                                  addr_space_t as)
3167 {
3168 #define MAX_RATIO 128
3169   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3170   static VEC (sbitmap, heap) *valid_mult_list;
3171   sbitmap valid_mult;
3172
3173   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3174     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3175
3176   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3177   if (!valid_mult)
3178     {
3179       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3180       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3181       rtx addr;
3182       HOST_WIDE_INT i;
3183
3184       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3185       sbitmap_zero (valid_mult);
3186       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3187       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3188         {
3189           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3190           if (memory_address_addr_space_p (mode, addr, as))
3191             SET_BIT (valid_mult, i + MAX_RATIO);
3192         }
3193
3194       if (dump_file && (dump_flags & TDF_DETAILS))
3195         {
3196           fprintf (dump_file, "  allowed multipliers:");
3197           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3198             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3199               fprintf (dump_file, " %d", (int) i);
3200           fprintf (dump_file, "\n");
3201           fprintf (dump_file, "\n");
3202         }
3203
3204       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3205     }
3206
3207   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3208     return false;
3209
3210   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3211 }
3212
3213 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3214    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3215    variable is omitted.  Compute the cost for a memory reference that accesses
3216    a memory location of mode MEM_MODE in address space AS.
3217
3218    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3219    size of MEM_MODE / RATIO) is available.  To make this determination, we
3220    look at the size of the increment to be made, which is given in CSTEP.
3221    CSTEP may be zero if the step is unknown.
3222    STMT_AFTER_INC is true iff the statement we're looking at is after the
3223    increment of the original biv.
3224
3225    TODO -- there must be some better way.  This all is quite crude.  */
3226
3227 typedef struct
3228 {
3229   HOST_WIDE_INT min_offset, max_offset;
3230   unsigned costs[2][2][2][2];
3231 } *address_cost_data;
3232
3233 DEF_VEC_P (address_cost_data);
3234 DEF_VEC_ALLOC_P (address_cost_data, heap);
3235
3236 static comp_cost
3237 get_address_cost (bool symbol_present, bool var_present,
3238                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3239                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3240                   addr_space_t as, bool speed,
3241                   bool stmt_after_inc, bool *may_autoinc)
3242 {
3243   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3244   static VEC(address_cost_data, heap) *address_cost_data_list;
3245   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3246   address_cost_data data;
3247   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3248   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3249   unsigned cost, acost, complexity;
3250   bool offset_p, ratio_p, autoinc;
3251   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3252   unsigned HOST_WIDE_INT mask;
3253   unsigned bits;
3254
3255   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3256     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3257                            data_index + 1);
3258
3259   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3260   if (!data)
3261     {
3262       HOST_WIDE_INT i;
3263       HOST_WIDE_INT rat, off = 0;
3264       int old_cse_not_expected, width;
3265       unsigned sym_p, var_p, off_p, rat_p, add_c;
3266       rtx seq, addr, base;
3267       rtx reg0, reg1;
3268
3269       data = (address_cost_data) xcalloc (1, sizeof (*data));
3270
3271       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3272
3273       width = GET_MODE_BITSIZE (address_mode) - 1;
3274       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3275         width = HOST_BITS_PER_WIDE_INT - 1;
3276       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3277
3278       for (i = width; i >= 0; i--)
3279         {
3280           off = -((HOST_WIDE_INT) 1 << i);
3281           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3282           if (memory_address_addr_space_p (mem_mode, addr, as))
3283             break;
3284         }
3285       data->min_offset = (i == -1? 0 : off);
3286
3287       for (i = width; i >= 0; i--)
3288         {
3289           off = ((HOST_WIDE_INT) 1 << i) - 1;
3290           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3291           if (memory_address_addr_space_p (mem_mode, addr, as))
3292             break;
3293         }
3294       if (i == -1)
3295         off = 0;
3296       data->max_offset = off;
3297
3298       if (dump_file && (dump_flags & TDF_DETAILS))
3299         {
3300           fprintf (dump_file, "get_address_cost:\n");
3301           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3302                    GET_MODE_NAME (mem_mode),
3303                    data->min_offset);
3304           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3305                    GET_MODE_NAME (mem_mode),
3306                    data->max_offset);
3307         }
3308
3309       rat = 1;
3310       for (i = 2; i <= MAX_RATIO; i++)
3311         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3312           {
3313             rat = i;
3314             break;
3315           }
3316
3317       /* Compute the cost of various addressing modes.  */
3318       acost = 0;
3319       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3320       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3321
3322       if (HAVE_PRE_DECREMENT)
3323         {
3324           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3325           has_predec[mem_mode]
3326             = memory_address_addr_space_p (mem_mode, addr, as);
3327         }
3328       if (HAVE_POST_DECREMENT)
3329         {
3330           addr = gen_rtx_POST_DEC (address_mode, reg0);
3331           has_postdec[mem_mode]
3332             = memory_address_addr_space_p (mem_mode, addr, as);
3333         }
3334       if (HAVE_PRE_INCREMENT)
3335         {
3336           addr = gen_rtx_PRE_INC (address_mode, reg0);
3337           has_preinc[mem_mode]
3338             = memory_address_addr_space_p (mem_mode, addr, as);
3339         }
3340       if (HAVE_POST_INCREMENT)
3341         {
3342           addr = gen_rtx_POST_INC (address_mode, reg0);
3343           has_postinc[mem_mode]
3344             = memory_address_addr_space_p (mem_mode, addr, as);
3345         }
3346       for (i = 0; i < 16; i++)
3347         {
3348           sym_p = i & 1;
3349           var_p = (i >> 1) & 1;
3350           off_p = (i >> 2) & 1;
3351           rat_p = (i >> 3) & 1;
3352
3353           addr = reg0;
3354           if (rat_p)
3355             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3356                                    gen_int_mode (rat, address_mode));
3357
3358           if (var_p)
3359             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3360
3361           if (sym_p)
3362             {
3363               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3364               /* ??? We can run into trouble with some backends by presenting
3365                  it with symbols which haven't been properly passed through
3366                  targetm.encode_section_info.  By setting the local bit, we
3367                  enhance the probability of things working.  */
3368               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3369
3370               if (off_p)
3371                 base = gen_rtx_fmt_e (CONST, address_mode,
3372                                       gen_rtx_fmt_ee
3373                                         (PLUS, address_mode, base,
3374                                          gen_int_mode (off, address_mode)));
3375             }
3376           else if (off_p)
3377             base = gen_int_mode (off, address_mode);
3378           else
3379             base = NULL_RTX;
3380
3381           if (base)
3382             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3383
3384           start_sequence ();
3385           /* To avoid splitting addressing modes, pretend that no cse will
3386              follow.  */
3387           old_cse_not_expected = cse_not_expected;
3388           cse_not_expected = true;
3389           addr = memory_address_addr_space (mem_mode, addr, as);
3390           cse_not_expected = old_cse_not_expected;
3391           seq = get_insns ();
3392           end_sequence ();
3393
3394           acost = seq_cost (seq, speed);
3395           acost += address_cost (addr, mem_mode, as, speed);
3396
3397           if (!acost)
3398             acost = 1;
3399           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3400         }
3401
3402       /* On some targets, it is quite expensive to load symbol to a register,
3403          which makes addresses that contain symbols look much more expensive.
3404          However, the symbol will have to be loaded in any case before the
3405          loop (and quite likely we have it in register already), so it does not
3406          make much sense to penalize them too heavily.  So make some final
3407          tweaks for the SYMBOL_PRESENT modes:
3408
3409          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3410          var is cheaper, use this mode with small penalty.
3411          If VAR_PRESENT is true, try whether the mode with
3412          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3413          if this is the case, use it.  */
3414       add_c = add_cost (address_mode, speed);
3415       for (i = 0; i < 8; i++)
3416         {
3417           var_p = i & 1;
3418           off_p = (i >> 1) & 1;
3419           rat_p = (i >> 2) & 1;
3420
3421           acost = data->costs[0][1][off_p][rat_p] + 1;
3422           if (var_p)
3423             acost += add_c;
3424
3425           if (acost < data->costs[1][var_p][off_p][rat_p])
3426             data->costs[1][var_p][off_p][rat_p] = acost;
3427         }
3428
3429       if (dump_file && (dump_flags & TDF_DETAILS))
3430         {
3431           fprintf (dump_file, "Address costs:\n");
3432
3433           for (i = 0; i < 16; i++)
3434             {
3435               sym_p = i & 1;
3436               var_p = (i >> 1) & 1;
3437               off_p = (i >> 2) & 1;
3438               rat_p = (i >> 3) & 1;
3439
3440               fprintf (dump_file, "  ");
3441               if (sym_p)
3442                 fprintf (dump_file, "sym + ");
3443               if (var_p)
3444                 fprintf (dump_file, "var + ");
3445               if (off_p)
3446                 fprintf (dump_file, "cst + ");
3447               if (rat_p)
3448                 fprintf (dump_file, "rat * ");
3449
3450               acost = data->costs[sym_p][var_p][off_p][rat_p];
3451               fprintf (dump_file, "index costs %d\n", acost);
3452             }
3453           if (has_predec[mem_mode] || has_postdec[mem_mode]
3454               || has_preinc[mem_mode] || has_postinc[mem_mode])
3455             fprintf (dump_file, "  May include autoinc/dec\n");
3456           fprintf (dump_file, "\n");
3457         }
3458
3459       VEC_replace (address_cost_data, address_cost_data_list,
3460                    data_index, data);
3461     }
3462
3463   bits = GET_MODE_BITSIZE (address_mode);
3464   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3465   offset &= mask;
3466   if ((offset >> (bits - 1) & 1))
3467     offset |= ~mask;
3468   s_offset = offset;
3469
3470   autoinc = false;
3471   msize = GET_MODE_SIZE (mem_mode);
3472   autoinc_offset = offset;
3473   if (stmt_after_inc)
3474     autoinc_offset += ratio * cstep;
3475   if (symbol_present || var_present || ratio != 1)
3476     autoinc = false;
3477   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3478                && msize == cstep)
3479            || (has_postdec[mem_mode] && autoinc_offset == 0
3480                && msize == -cstep)
3481            || (has_preinc[mem_mode] && autoinc_offset == msize
3482                && msize == cstep)
3483            || (has_predec[mem_mode] && autoinc_offset == -msize
3484                && msize == -cstep))
3485     autoinc = true;
3486
3487   cost = 0;
3488   offset_p = (s_offset != 0
3489               && data->min_offset <= s_offset
3490               && s_offset <= data->max_offset);
3491   ratio_p = (ratio != 1
3492              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3493
3494   if (ratio != 1 && !ratio_p)
3495     cost += multiply_by_cost (ratio, address_mode, speed);
3496
3497   if (s_offset && !offset_p && !symbol_present)
3498     cost += add_cost (address_mode, speed);
3499
3500   if (may_autoinc)
3501     *may_autoinc = autoinc;
3502   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3503   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3504   return new_cost (cost + acost, complexity);
3505 }
3506
3507 /* Estimates cost of forcing expression EXPR into a variable.  */
3508
3509 static comp_cost
3510 force_expr_to_var_cost (tree expr, bool speed)
3511 {
3512   static bool costs_initialized = false;
3513   static unsigned integer_cost [2];
3514   static unsigned symbol_cost [2];
3515   static unsigned address_cost [2];
3516   tree op0, op1;
3517   comp_cost cost0, cost1, cost;
3518   enum machine_mode mode;
3519
3520   if (!costs_initialized)
3521     {
3522       tree type = build_pointer_type (integer_type_node);
3523       tree var, addr;
3524       rtx x;
3525       int i;
3526
3527       var = create_tmp_var_raw (integer_type_node, "test_var");
3528       TREE_STATIC (var) = 1;
3529       x = produce_memory_decl_rtl (var, NULL);
3530       SET_DECL_RTL (var, x);
3531
3532       addr = build1 (ADDR_EXPR, type, var);
3533
3534
3535       for (i = 0; i < 2; i++)
3536         {
3537           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3538                                                              2000), i);
3539
3540           symbol_cost[i] = computation_cost (addr, i) + 1;
3541
3542           address_cost[i]
3543             = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3544                                         addr,
3545                                         build_int_cst (sizetype, 2000)), i) + 1;
3546           if (dump_file && (dump_flags & TDF_DETAILS))
3547             {
3548               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3549               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3550               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3551               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3552               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3553               fprintf (dump_file, "\n");
3554             }
3555         }
3556
3557       costs_initialized = true;
3558     }
3559
3560   STRIP_NOPS (expr);
3561
3562   if (SSA_VAR_P (expr))
3563     return zero_cost;
3564
3565   if (is_gimple_min_invariant (expr))
3566     {
3567       if (TREE_CODE (expr) == INTEGER_CST)
3568         return new_cost (integer_cost [speed], 0);
3569
3570       if (TREE_CODE (expr) == ADDR_EXPR)
3571         {
3572           tree obj = TREE_OPERAND (expr, 0);
3573
3574           if (TREE_CODE (obj) == VAR_DECL
3575               || TREE_CODE (obj) == PARM_DECL
3576               || TREE_CODE (obj) == RESULT_DECL)
3577             return new_cost (symbol_cost [speed], 0);
3578         }
3579
3580       return new_cost (address_cost [speed], 0);
3581     }
3582
3583   switch (TREE_CODE (expr))
3584     {
3585     case POINTER_PLUS_EXPR:
3586     case PLUS_EXPR:
3587     case MINUS_EXPR:
3588     case MULT_EXPR:
3589       op0 = TREE_OPERAND (expr, 0);
3590       op1 = TREE_OPERAND (expr, 1);
3591       STRIP_NOPS (op0);
3592       STRIP_NOPS (op1);
3593
3594       if (is_gimple_val (op0))
3595         cost0 = zero_cost;
3596       else
3597         cost0 = force_expr_to_var_cost (op0, speed);
3598
3599       if (is_gimple_val (op1))
3600         cost1 = zero_cost;
3601       else
3602         cost1 = force_expr_to_var_cost (op1, speed);
3603
3604       break;
3605
3606     case NEGATE_EXPR:
3607       op0 = TREE_OPERAND (expr, 0);
3608       STRIP_NOPS (op0);
3609       op1 = NULL_TREE;
3610
3611       if (is_gimple_val (op0))
3612         cost0 = zero_cost;
3613       else
3614         cost0 = force_expr_to_var_cost (op0, speed);
3615
3616       cost1 = zero_cost;
3617       break;
3618
3619     default:
3620       /* Just an arbitrary value, FIXME.  */
3621       return new_cost (target_spill_cost[speed], 0);
3622     }
3623
3624   mode = TYPE_MODE (TREE_TYPE (expr));
3625   switch (TREE_CODE (expr))
3626     {
3627     case POINTER_PLUS_EXPR:
3628     case PLUS_EXPR:
3629     case MINUS_EXPR:
3630     case NEGATE_EXPR:
3631       cost = new_cost (add_cost (mode, speed), 0);
3632       break;
3633
3634     case MULT_EXPR:
3635       if (cst_and_fits_in_hwi (op0))
3636         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3637       else if (cst_and_fits_in_hwi (op1))
3638         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3639       else
3640         return new_cost (target_spill_cost [speed], 0);
3641       break;
3642
3643     default:
3644       gcc_unreachable ();
3645     }
3646
3647   cost = add_costs (cost, cost0);
3648   cost = add_costs (cost, cost1);
3649
3650   /* Bound the cost by target_spill_cost.  The parts of complicated
3651      computations often are either loop invariant or at least can
3652      be shared between several iv uses, so letting this grow without
3653      limits would not give reasonable results.  */
3654   if (cost.cost > (int) target_spill_cost [speed])
3655     cost.cost = target_spill_cost [speed];
3656
3657   return cost;
3658 }
3659
3660 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3661    invariants the computation depends on.  */
3662
3663 static comp_cost
3664 force_var_cost (struct ivopts_data *data,
3665                 tree expr, bitmap *depends_on)
3666 {
3667   if (depends_on)
3668     {
3669       fd_ivopts_data = data;
3670       walk_tree (&expr, find_depends, depends_on, NULL);
3671     }
3672
3673   return force_expr_to_var_cost (expr, data->speed);
3674 }
3675
3676 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3677    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3678    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3679    invariants the computation depends on.  */
3680
3681 static comp_cost
3682 split_address_cost (struct ivopts_data *data,
3683                     tree addr, bool *symbol_present, bool *var_present,
3684                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3685 {
3686   tree core;
3687   HOST_WIDE_INT bitsize;
3688   HOST_WIDE_INT bitpos;
3689   tree toffset;
3690   enum machine_mode mode;
3691   int unsignedp, volatilep;
3692
3693   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3694                               &unsignedp, &volatilep, false);
3695
3696   if (toffset != 0
3697       || bitpos % BITS_PER_UNIT != 0
3698       || TREE_CODE (core) != VAR_DECL)
3699     {
3700       *symbol_present = false;
3701       *var_present = true;
3702       fd_ivopts_data = data;
3703       walk_tree (&addr, find_depends, depends_on, NULL);
3704       return new_cost (target_spill_cost[data->speed], 0);
3705     }
3706
3707   *offset += bitpos / BITS_PER_UNIT;
3708   if (TREE_STATIC (core)
3709       || DECL_EXTERNAL (core))
3710     {
3711       *symbol_present = true;
3712       *var_present = false;
3713       return zero_cost;
3714     }
3715
3716   *symbol_present = false;
3717   *var_present = true;
3718   return zero_cost;
3719 }
3720
3721 /* Estimates cost of expressing difference of addresses E1 - E2 as
3722    var + symbol + offset.  The value of offset is added to OFFSET,
3723    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3724    part is missing.  DEPENDS_ON is a set of the invariants the computation
3725    depends on.  */
3726
3727 static comp_cost
3728 ptr_difference_cost (struct ivopts_data *data,
3729                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3730                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3731 {
3732   HOST_WIDE_INT diff = 0;
3733   aff_tree aff_e1, aff_e2;
3734   tree type;
3735
3736   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3737
3738   if (ptr_difference_const (e1, e2, &diff))
3739     {
3740       *offset += diff;
3741       *symbol_present = false;
3742       *var_present = false;
3743       return zero_cost;
3744     }
3745
3746   if (integer_zerop (e2))
3747     return split_address_cost (data, TREE_OPERAND (e1, 0),
3748                                symbol_present, var_present, offset, depends_on);
3749
3750   *symbol_present = false;
3751   *var_present = true;
3752
3753   type = signed_type_for (TREE_TYPE (e1));
3754   tree_to_aff_combination (e1, type, &aff_e1);
3755   tree_to_aff_combination (e2, type, &aff_e2);
3756   aff_combination_scale (&aff_e2, double_int_minus_one);
3757   aff_combination_add (&aff_e1, &aff_e2);
3758
3759   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3760 }
3761
3762 /* Estimates cost of expressing difference E1 - E2 as
3763    var + symbol + offset.  The value of offset is added to OFFSET,
3764    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3765    part is missing.  DEPENDS_ON is a set of the invariants the computation
3766    depends on.  */
3767
3768 static comp_cost
3769 difference_cost (struct ivopts_data *data,
3770                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3771                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3772 {
3773   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3774   unsigned HOST_WIDE_INT off1, off2;
3775   aff_tree aff_e1, aff_e2;
3776   tree type;
3777
3778   e1 = strip_offset (e1, &off1);
3779   e2 = strip_offset (e2, &off2);
3780   *offset += off1 - off2;
3781
3782   STRIP_NOPS (e1);
3783   STRIP_NOPS (e2);
3784
3785   if (TREE_CODE (e1) == ADDR_EXPR)
3786     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3787                                 offset, depends_on);
3788   *symbol_present = false;
3789
3790   if (operand_equal_p (e1, e2, 0))
3791     {
3792       *var_present = false;
3793       return zero_cost;
3794     }
3795
3796   *var_present = true;
3797
3798   if (integer_zerop (e2))
3799     return force_var_cost (data, e1, depends_on);
3800
3801   if (integer_zerop (e1))
3802     {
3803       comp_cost cost = force_var_cost (data, e2, depends_on);
3804       cost.cost += multiply_by_cost (-1, mode, data->speed);
3805       return cost;
3806     }
3807
3808   type = signed_type_for (TREE_TYPE (e1));
3809   tree_to_aff_combination (e1, type, &aff_e1);
3810   tree_to_aff_combination (e2, type, &aff_e2);
3811   aff_combination_scale (&aff_e2, double_int_minus_one);
3812   aff_combination_add (&aff_e1, &aff_e2);
3813
3814   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3815 }
3816
3817 /* Returns true if AFF1 and AFF2 are identical.  */
3818
3819 static bool
3820 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3821 {
3822   unsigned i;
3823
3824   if (aff1->n != aff2->n)
3825     return false;
3826
3827   for (i = 0; i < aff1->n; i++)
3828     {
3829       if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3830         return false;
3831
3832       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3833         return false;
3834     }
3835   return true;
3836 }
3837
3838 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3839    requires a new compiler generated temporary.  Returns -1 otherwise.
3840    ADDRESS_P is a flag indicating if the expression is for address
3841    computation.  */
3842
3843 static int
3844 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3845                             tree cbase, HOST_WIDE_INT ratio,
3846                             bool address_p)
3847 {
3848   aff_tree ubase_aff, cbase_aff;
3849   tree expr, ub, cb;
3850   struct iv_inv_expr_ent ent;
3851   struct iv_inv_expr_ent **slot;
3852
3853   STRIP_NOPS (ubase);
3854   STRIP_NOPS (cbase);
3855   ub = ubase;
3856   cb = cbase;
3857
3858   if ((TREE_CODE (ubase) == INTEGER_CST)
3859       && (TREE_CODE (cbase) == INTEGER_CST))
3860     return -1;
3861
3862   /* Strips the constant part. */
3863   if (TREE_CODE (ubase) == PLUS_EXPR
3864       || TREE_CODE (ubase) == MINUS_EXPR
3865       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3866     {
3867       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3868         ubase = TREE_OPERAND (ubase, 0);
3869     }
3870
3871   /* Strips the constant part. */
3872   if (TREE_CODE (cbase) == PLUS_EXPR
3873       || TREE_CODE (cbase) == MINUS_EXPR
3874       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3875     {
3876       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3877         cbase = TREE_OPERAND (cbase, 0);
3878     }
3879
3880   if (address_p)
3881     {
3882       if (((TREE_CODE (ubase) == SSA_NAME)
3883            || (TREE_CODE (ubase) == ADDR_EXPR
3884                && is_gimple_min_invariant (ubase)))
3885           && (TREE_CODE (cbase) == INTEGER_CST))
3886         return -1;
3887
3888       if (((TREE_CODE (cbase) == SSA_NAME)
3889            || (TREE_CODE (cbase) == ADDR_EXPR
3890                && is_gimple_min_invariant (cbase)))
3891           && (TREE_CODE (ubase) == INTEGER_CST))
3892         return -1;
3893     }
3894
3895   if (ratio == 1)
3896     {
3897       if(operand_equal_p (ubase, cbase, 0))
3898         return -1;
3899
3900       if (TREE_CODE (ubase) == ADDR_EXPR
3901           && TREE_CODE (cbase) == ADDR_EXPR)
3902         {
3903           tree usym, csym;
3904
3905           usym = TREE_OPERAND (ubase, 0);
3906           csym = TREE_OPERAND (cbase, 0);
3907           if (TREE_CODE (usym) == ARRAY_REF)
3908             {
3909               tree ind = TREE_OPERAND (usym, 1);
3910               if (TREE_CODE (ind) == INTEGER_CST
3911                   && host_integerp (ind, 0)
3912                   && TREE_INT_CST_LOW (ind) == 0)
3913                 usym = TREE_OPERAND (usym, 0);
3914             }
3915           if (TREE_CODE (csym) == ARRAY_REF)
3916             {
3917               tree ind = TREE_OPERAND (csym, 1);
3918               if (TREE_CODE (ind) == INTEGER_CST
3919                   && host_integerp (ind, 0)
3920                   && TREE_INT_CST_LOW (ind) == 0)
3921                 csym = TREE_OPERAND (csym, 0);
3922             }
3923           if (operand_equal_p (usym, csym, 0))
3924             return -1;
3925         }
3926       /* Now do more complex comparison  */
3927       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3928       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3929       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3930         return -1;
3931     }
3932
3933   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3934   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3935
3936   aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3937   aff_combination_add (&ubase_aff, &cbase_aff);
3938   expr = aff_combination_to_tree (&ubase_aff);
3939   ent.expr = expr;
3940   ent.hash = iterative_hash_expr (expr, 0);
3941   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3942                                                      &ent, INSERT);
3943   if (*slot)
3944     return (*slot)->id;
3945
3946   *slot = XNEW (struct iv_inv_expr_ent);
3947   (*slot)->expr = expr;
3948   (*slot)->hash = ent.hash;
3949   (*slot)->id = data->inv_expr_id++;
3950   return  (*slot)->id;
3951 }
3952
3953
3954
3955 /* Determines the cost of the computation by that USE is expressed
3956    from induction variable CAND.  If ADDRESS_P is true, we just need
3957    to create an address from it, otherwise we want to get it into
3958    register.  A set of invariants we depend on is stored in
3959    DEPENDS_ON.  AT is the statement at that the value is computed.
3960    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3961    addressing is likely.  */
3962
3963 static comp_cost
3964 get_computation_cost_at (struct ivopts_data *data,
3965                          struct iv_use *use, struct iv_cand *cand,
3966                          bool address_p, bitmap *depends_on, gimple at,
3967                          bool *can_autoinc,
3968                          int *inv_expr_id)
3969 {
3970   tree ubase = use->iv->base, ustep = use->iv->step;
3971   tree cbase, cstep;
3972   tree utype = TREE_TYPE (ubase), ctype;
3973   unsigned HOST_WIDE_INT cstepi, offset = 0;
3974   HOST_WIDE_INT ratio, aratio;
3975   bool var_present, symbol_present, stmt_is_after_inc;
3976   comp_cost cost;
3977   double_int rat;
3978   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3979
3980   *depends_on = NULL;
3981
3982   /* Only consider real candidates.  */
3983   if (!cand->iv)
3984     return infinite_cost;
3985
3986   cbase = cand->iv->base;
3987   cstep = cand->iv->step;
3988   ctype = TREE_TYPE (cbase);
3989
3990   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3991     {
3992       /* We do not have a precision to express the values of use.  */
3993       return infinite_cost;
3994     }
3995
3996   if (address_p)
3997     {
3998       /* Do not try to express address of an object with computation based
3999          on address of a different object.  This may cause problems in rtl
4000          level alias analysis (that does not expect this to be happening,
4001          as this is illegal in C), and would be unlikely to be useful
4002          anyway.  */
4003       if (use->iv->base_object
4004           && cand->iv->base_object
4005           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4006         return infinite_cost;
4007     }
4008
4009   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4010     {
4011       /* TODO -- add direct handling of this case.  */
4012       goto fallback;
4013     }
4014
4015   /* CSTEPI is removed from the offset in case statement is after the
4016      increment.  If the step is not constant, we use zero instead.
4017      This is a bit imprecise (there is the extra addition), but
4018      redundancy elimination is likely to transform the code so that
4019      it uses value of the variable before increment anyway,
4020      so it is not that much unrealistic.  */
4021   if (cst_and_fits_in_hwi (cstep))
4022     cstepi = int_cst_value (cstep);
4023   else
4024     cstepi = 0;
4025
4026   if (!constant_multiple_of (ustep, cstep, &rat))
4027     return infinite_cost;
4028
4029   if (double_int_fits_in_shwi_p (rat))
4030     ratio = double_int_to_shwi (rat);
4031   else
4032     return infinite_cost;
4033
4034   STRIP_NOPS (cbase);
4035   ctype = TREE_TYPE (cbase);
4036
4037   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4038
4039   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4040      or ratio == 1, it is better to handle this like
4041
4042      ubase - ratio * cbase + ratio * var
4043
4044      (also holds in the case ratio == -1, TODO.  */
4045
4046   if (cst_and_fits_in_hwi (cbase))
4047     {
4048       offset = - ratio * int_cst_value (cbase);
4049       cost = difference_cost (data,
4050                               ubase, build_int_cst (utype, 0),
4051                               &symbol_present, &var_present, &offset,
4052                               depends_on);
4053       cost.cost /= avg_loop_niter (data->current_loop);
4054     }
4055   else if (ratio == 1)
4056     {
4057       tree real_cbase = cbase;
4058
4059       /* Check to see if any adjustment is needed.  */
4060       if (cstepi == 0 && stmt_is_after_inc)
4061         {
4062           aff_tree real_cbase_aff;
4063           aff_tree cstep_aff;
4064
4065           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4066                                    &real_cbase_aff);
4067           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4068
4069           aff_combination_add (&real_cbase_aff, &cstep_aff);
4070           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4071         }
4072
4073       cost = difference_cost (data,
4074                               ubase, real_cbase,
4075                               &symbol_present, &var_present, &offset,
4076                               depends_on);
4077       cost.cost /= avg_loop_niter (data->current_loop);
4078     }
4079   else if (address_p
4080            && !POINTER_TYPE_P (ctype)
4081            && multiplier_allowed_in_address_p
4082                 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4083                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4084     {
4085       cbase
4086         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4087       cost = difference_cost (data,
4088                               ubase, cbase,
4089                               &symbol_present, &var_present, &offset,
4090                               depends_on);
4091       cost.cost /= avg_loop_niter (data->current_loop);
4092     }
4093   else
4094     {
4095       cost = force_var_cost (data, cbase, depends_on);
4096       cost = add_costs (cost,
4097                         difference_cost (data,
4098                                          ubase, build_int_cst (utype, 0),
4099                                          &symbol_present, &var_present,
4100                                          &offset, depends_on));
4101       cost.cost /= avg_loop_niter (data->current_loop);
4102       cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4103     }
4104
4105   if (inv_expr_id)
4106     {
4107       *inv_expr_id =
4108           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4109       /* Clear depends on.  */
4110       if (*inv_expr_id != -1 && depends_on && *depends_on)
4111         bitmap_clear (*depends_on);
4112     }
4113
4114   /* If we are after the increment, the value of the candidate is higher by
4115      one iteration.  */
4116   if (stmt_is_after_inc)
4117     offset -= ratio * cstepi;
4118
4119   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4120      (symbol/var1/const parts may be omitted).  If we are looking for an
4121      address, find the cost of addressing this.  */
4122   if (address_p)
4123     return add_costs (cost,
4124                       get_address_cost (symbol_present, var_present,
4125                                         offset, ratio, cstepi,
4126                                         TYPE_MODE (TREE_TYPE (utype)),
4127                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4128                                         speed, stmt_is_after_inc,
4129                                         can_autoinc));
4130
4131   /* Otherwise estimate the costs for computing the expression.  */
4132   if (!symbol_present && !var_present && !offset)
4133     {
4134       if (ratio != 1)
4135         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4136       return cost;
4137     }
4138
4139   /* Symbol + offset should be compile-time computable so consider that they
4140       are added once to the variable, if present.  */
4141   if (var_present && (symbol_present || offset))
4142     cost.cost += adjust_setup_cost (data,
4143                                     add_cost (TYPE_MODE (ctype), speed));
4144
4145   /* Having offset does not affect runtime cost in case it is added to
4146      symbol, but it increases complexity.  */
4147   if (offset)
4148     cost.complexity++;
4149
4150   cost.cost += add_cost (TYPE_MODE (ctype), speed);
4151
4152   aratio = ratio > 0 ? ratio : -ratio;
4153   if (aratio != 1)
4154     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4155   return cost;
4156
4157 fallback:
4158   if (can_autoinc)
4159     *can_autoinc = false;
4160
4161   {
4162     /* Just get the expression, expand it and measure the cost.  */
4163     tree comp = get_computation_at (data->current_loop, use, cand, at);
4164
4165     if (!comp)
4166       return infinite_cost;
4167
4168     if (address_p)
4169       comp = build_simple_mem_ref (comp);
4170
4171     return new_cost (computation_cost (comp, speed), 0);
4172   }
4173 }
4174
4175 /* Determines the cost of the computation by that USE is expressed
4176    from induction variable CAND.  If ADDRESS_P is true, we just need
4177    to create an address from it, otherwise we want to get it into
4178    register.  A set of invariants we depend on is stored in
4179    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4180    autoinc addressing is likely.  */
4181
4182 static comp_cost
4183 get_computation_cost (struct ivopts_data *data,
4184                       struct iv_use *use, struct iv_cand *cand,
4185                       bool address_p, bitmap *depends_on,
4186                       bool *can_autoinc, int *inv_expr_id)
4187 {
4188   return get_computation_cost_at (data,
4189                                   use, cand, address_p, depends_on, use->stmt,
4190                                   can_autoinc, inv_expr_id);
4191 }
4192
4193 /* Determines cost of basing replacement of USE on CAND in a generic
4194    expression.  */
4195
4196 static bool
4197 determine_use_iv_cost_generic (struct ivopts_data *data,
4198                                struct iv_use *use, struct iv_cand *cand)
4199 {
4200   bitmap depends_on;
4201   comp_cost cost;
4202   int inv_expr_id = -1;
4203
4204   /* The simple case first -- if we need to express value of the preserved
4205      original biv, the cost is 0.  This also prevents us from counting the
4206      cost of increment twice -- once at this use and once in the cost of
4207      the candidate.  */
4208   if (cand->pos == IP_ORIGINAL
4209       && cand->incremented_at == use->stmt)
4210     {
4211       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4212       return true;
4213     }
4214
4215   cost = get_computation_cost (data, use, cand, false, &depends_on,
4216                                NULL, &inv_expr_id);
4217
4218   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4219                    inv_expr_id);
4220
4221   return !infinite_cost_p (cost);
4222 }
4223
4224 /* Determines cost of basing replacement of USE on CAND in an address.  */
4225
4226 static bool
4227 determine_use_iv_cost_address (struct ivopts_data *data,
4228                                struct iv_use *use, struct iv_cand *cand)
4229 {
4230   bitmap depends_on;
4231   bool can_autoinc;
4232   int inv_expr_id = -1;
4233   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4234                                          &can_autoinc, &inv_expr_id);
4235
4236   if (cand->ainc_use == use)
4237     {
4238       if (can_autoinc)
4239         cost.cost -= cand->cost_step;
4240       /* If we generated the candidate solely for exploiting autoincrement
4241          opportunities, and it turns out it can't be used, set the cost to
4242          infinity to make sure we ignore it.  */
4243       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4244         cost = infinite_cost;
4245     }
4246   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4247                    inv_expr_id);
4248
4249   return !infinite_cost_p (cost);
4250 }
4251
4252 /* Computes value of candidate CAND at position AT in iteration NITER, and
4253    stores it to VAL.  */
4254
4255 static void
4256 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4257                aff_tree *val)
4258 {
4259   aff_tree step, delta, nit;
4260   struct iv *iv = cand->iv;
4261   tree type = TREE_TYPE (iv->base);
4262   tree steptype = type;
4263   if (POINTER_TYPE_P (type))
4264     steptype = sizetype;
4265
4266   tree_to_aff_combination (iv->step, steptype, &step);
4267   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4268   aff_combination_convert (&nit, steptype);
4269   aff_combination_mult (&nit, &step, &delta);
4270   if (stmt_after_increment (loop, cand, at))
4271     aff_combination_add (&delta, &step);
4272
4273   tree_to_aff_combination (iv->base, type, val);
4274   aff_combination_add (val, &delta);
4275 }
4276
4277 /* Returns period of induction variable iv.  */
4278
4279 static tree
4280 iv_period (struct iv *iv)
4281 {
4282   tree step = iv->step, period, type;
4283   tree pow2div;
4284
4285   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4286
4287   type = unsigned_type_for (TREE_TYPE (step));
4288   /* Period of the iv is lcm (step, type_range)/step -1,
4289      i.e., N*type_range/step - 1. Since type range is power
4290      of two, N == (step >> num_of_ending_zeros_binary (step),
4291      so the final result is
4292
4293        (type_range >> num_of_ending_zeros_binary (step)) - 1
4294
4295   */
4296   pow2div = num_ending_zeros (step);
4297
4298   period = build_low_bits_mask (type,
4299                                 (TYPE_PRECISION (type)
4300                                  - tree_low_cst (pow2div, 1)));
4301
4302   return period;
4303 }
4304
4305 /* Returns the comparison operator used when eliminating the iv USE.  */
4306
4307 static enum tree_code
4308 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4309 {
4310   struct loop *loop = data->current_loop;
4311   basic_block ex_bb;
4312   edge exit;
4313
4314   ex_bb = gimple_bb (use->stmt);
4315   exit = EDGE_SUCC (ex_bb, 0);
4316   if (flow_bb_inside_loop_p (loop, exit->dest))
4317     exit = EDGE_SUCC (ex_bb, 1);
4318
4319   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4320 }
4321
4322 /* Check whether it is possible to express the condition in USE by comparison
4323    of candidate CAND.  If so, store the value compared with to BOUND.  */
4324
4325 static bool
4326 may_eliminate_iv (struct ivopts_data *data,
4327                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4328 {
4329   basic_block ex_bb;
4330   edge exit;
4331   tree nit, period;
4332   struct loop *loop = data->current_loop;
4333   aff_tree bnd;
4334   struct tree_niter_desc *desc = NULL;
4335
4336   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4337     return false;
4338
4339   /* For now works only for exits that dominate the loop latch.
4340      TODO: extend to other conditions inside loop body.  */
4341   ex_bb = gimple_bb (use->stmt);
4342   if (use->stmt != last_stmt (ex_bb)
4343       || gimple_code (use->stmt) != GIMPLE_COND
4344       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4345     return false;
4346
4347   exit = EDGE_SUCC (ex_bb, 0);
4348   if (flow_bb_inside_loop_p (loop, exit->dest))
4349     exit = EDGE_SUCC (ex_bb, 1);
4350   if (flow_bb_inside_loop_p (loop, exit->dest))
4351     return false;
4352
4353   nit = niter_for_exit (data, exit, &desc);
4354   if (!nit)
4355     return false;
4356
4357   /* Determine whether we can use the variable to test the exit condition.
4358      This is the case iff the period of the induction variable is greater
4359      than the number of iterations for which the exit condition is true.  */
4360   period = iv_period (cand->iv);
4361
4362   /* If the number of iterations is constant, compare against it directly.  */
4363   if (TREE_CODE (nit) == INTEGER_CST)
4364     {
4365       /* See cand_value_at.  */
4366       if (stmt_after_increment (loop, cand, use->stmt))
4367         {
4368           if (!tree_int_cst_lt (nit, period))
4369             return false;
4370         }
4371       else
4372         {
4373           if (tree_int_cst_lt (period, nit))
4374             return false;
4375         }
4376     }
4377
4378   /* If not, and if this is the only possible exit of the loop, see whether
4379      we can get a conservative estimate on the number of iterations of the
4380      entire loop and compare against that instead.  */
4381   else
4382     {
4383       double_int period_value, max_niter;
4384
4385       max_niter = desc->max;
4386       if (stmt_after_increment (loop, cand, use->stmt))
4387         max_niter = double_int_add (max_niter, double_int_one);
4388       period_value = tree_to_double_int (period);
4389       if (double_int_ucmp (max_niter, period_value) > 0)
4390         {
4391           /* See if we can take advantage of infered loop bound information.  */
4392           if (loop_only_exit_p (loop, exit))
4393             {
4394               if (!estimated_loop_iterations (loop, true, &max_niter))
4395                 return false;
4396               /* The loop bound is already adjusted by adding 1.  */
4397               if (double_int_ucmp (max_niter, period_value) > 0)
4398                 return false;
4399             }
4400           else
4401             return false;
4402         }
4403     }
4404
4405   cand_value_at (loop, cand, use->stmt, nit, &bnd);
4406
4407   *bound = aff_combination_to_tree (&bnd);
4408   /* It is unlikely that computing the number of iterations using division
4409      would be more profitable than keeping the original induction variable.  */
4410   if (expression_expensive_p (*bound))
4411     return false;
4412   return true;
4413 }
4414
4415
4416 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4417
4418 static bool
4419 determine_use_iv_cost_condition (struct ivopts_data *data,
4420                                  struct iv_use *use, struct iv_cand *cand)
4421 {
4422   tree bound = NULL_TREE;
4423   struct iv *cmp_iv;
4424   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4425   comp_cost elim_cost, express_cost, cost;
4426   bool ok;
4427   int inv_expr_id = -1;
4428   tree *control_var, *bound_cst;
4429
4430   /* Only consider real candidates.  */
4431   if (!cand->iv)
4432     {
4433       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4434       return false;
4435     }
4436
4437   /* Try iv elimination.  */
4438   if (may_eliminate_iv (data, use, cand, &bound))
4439     {
4440       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4441       /* The bound is a loop invariant, so it will be only computed
4442          once.  */
4443       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4444     }
4445   else
4446     elim_cost = infinite_cost;
4447
4448   /* Try expressing the original giv.  If it is compared with an invariant,
4449      note that we cannot get rid of it.  */
4450   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4451                               NULL, &cmp_iv);
4452   gcc_assert (ok);
4453
4454   /* When the condition is a comparison of the candidate IV against
4455      zero, prefer this IV.
4456
4457      TODO: The constant that we're substracting from the cost should
4458      be target-dependent.  This information should be added to the
4459      target costs for each backend.  */
4460   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4461       && integer_zerop (*bound_cst)
4462       && (operand_equal_p (*control_var, cand->var_after, 0)
4463           || operand_equal_p (*control_var, cand->var_before, 0)))
4464     elim_cost.cost -= 1;
4465
4466   express_cost = get_computation_cost (data, use, cand, false,
4467                                        &depends_on_express, NULL,
4468                                        &inv_expr_id);
4469   fd_ivopts_data = data;
4470   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4471
4472   /* Choose the better approach, preferring the eliminated IV. */
4473   if (compare_costs (elim_cost, express_cost) <= 0)
4474     {
4475       cost = elim_cost;
4476       depends_on = depends_on_elim;
4477       depends_on_elim = NULL;
4478     }
4479   else
4480     {
4481       cost = express_cost;
4482       depends_on = depends_on_express;
4483       depends_on_express = NULL;
4484       bound = NULL_TREE;
4485     }
4486
4487   set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4488
4489   if (depends_on_elim)
4490     BITMAP_FREE (depends_on_elim);
4491   if (depends_on_express)
4492     BITMAP_FREE (depends_on_express);
4493
4494   return !infinite_cost_p (cost);
4495 }
4496
4497 /* Determines cost of basing replacement of USE on CAND.  Returns false
4498    if USE cannot be based on CAND.  */
4499
4500 static bool
4501 determine_use_iv_cost (struct ivopts_data *data,
4502                        struct iv_use *use, struct iv_cand *cand)
4503 {
4504   switch (use->type)
4505     {
4506     case USE_NONLINEAR_EXPR:
4507       return determine_use_iv_cost_generic (data, use, cand);
4508
4509     case USE_ADDRESS:
4510       return determine_use_iv_cost_address (data, use, cand);
4511
4512     case USE_COMPARE:
4513       return determine_use_iv_cost_condition (data, use, cand);
4514
4515     default:
4516       gcc_unreachable ();
4517     }
4518 }
4519
4520 /* Return true if get_computation_cost indicates that autoincrement is
4521    a possibility for the pair of USE and CAND, false otherwise.  */
4522
4523 static bool
4524 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4525                            struct iv_cand *cand)
4526 {
4527   bitmap depends_on;
4528   bool can_autoinc;
4529   comp_cost cost;
4530
4531   if (use->type != USE_ADDRESS)
4532     return false;
4533
4534   cost = get_computation_cost (data, use, cand, true, &depends_on,
4535                                &can_autoinc, NULL);
4536
4537   BITMAP_FREE (depends_on);
4538
4539   return !infinite_cost_p (cost) && can_autoinc;
4540 }
4541
4542 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4543    use that allows autoincrement, and set their AINC_USE if possible.  */
4544
4545 static void
4546 set_autoinc_for_original_candidates (struct ivopts_data *data)
4547 {
4548   unsigned i, j;
4549
4550   for (i = 0; i < n_iv_cands (data); i++)
4551     {
4552       struct iv_cand *cand = iv_cand (data, i);
4553       struct iv_use *closest = NULL;
4554       if (cand->pos != IP_ORIGINAL)
4555         continue;
4556       for (j = 0; j < n_iv_uses (data); j++)
4557         {
4558           struct iv_use *use = iv_use (data, j);
4559           unsigned uid = gimple_uid (use->stmt);
4560           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4561               || uid > gimple_uid (cand->incremented_at))
4562             continue;
4563           if (closest == NULL || uid > gimple_uid (closest->stmt))
4564             closest = use;
4565         }
4566       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4567         continue;
4568       cand->ainc_use = closest;
4569     }
4570 }
4571
4572 /* Finds the candidates for the induction variables.  */
4573
4574 static void
4575 find_iv_candidates (struct ivopts_data *data)
4576 {
4577   /* Add commonly used ivs.  */
4578   add_standard_iv_candidates (data);
4579
4580   /* Add old induction variables.  */
4581   add_old_ivs_candidates (data);
4582
4583   /* Add induction variables derived from uses.  */
4584   add_derived_ivs_candidates (data);
4585
4586   set_autoinc_for_original_candidates (data);
4587
4588   /* Record the important candidates.  */
4589   record_important_candidates (data);
4590 }
4591
4592 /* Determines costs of basing the use of the iv on an iv candidate.  */
4593
4594 static void
4595 determine_use_iv_costs (struct ivopts_data *data)
4596 {
4597   unsigned i, j;
4598   struct iv_use *use;
4599   struct iv_cand *cand;
4600   bitmap to_clear = BITMAP_ALLOC (NULL);
4601
4602   alloc_use_cost_map (data);
4603
4604   for (i = 0; i < n_iv_uses (data); i++)
4605     {
4606       use = iv_use (data, i);
4607
4608       if (data->consider_all_candidates)
4609         {
4610           for (j = 0; j < n_iv_cands (data); j++)
4611             {
4612               cand = iv_cand (data, j);
4613               determine_use_iv_cost (data, use, cand);
4614             }
4615         }
4616       else
4617         {
4618           bitmap_iterator bi;
4619
4620           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4621             {
4622               cand = iv_cand (data, j);
4623               if (!determine_use_iv_cost (data, use, cand))
4624                 bitmap_set_bit (to_clear, j);
4625             }
4626
4627           /* Remove the candidates for that the cost is infinite from
4628              the list of related candidates.  */
4629           bitmap_and_compl_into (use->related_cands, to_clear);
4630           bitmap_clear (to_clear);
4631         }
4632     }
4633
4634   BITMAP_FREE (to_clear);
4635
4636   if (dump_file && (dump_flags & TDF_DETAILS))
4637     {
4638       fprintf (dump_file, "Use-candidate costs:\n");
4639
4640       for (i = 0; i < n_iv_uses (data); i++)
4641         {
4642           use = iv_use (data, i);
4643
4644           fprintf (dump_file, "Use %d:\n", i);
4645           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4646           for (j = 0; j < use->n_map_members; j++)
4647             {
4648               if (!use->cost_map[j].cand
4649                   || infinite_cost_p (use->cost_map[j].cost))
4650                 continue;
4651
4652               fprintf (dump_file, "  %d\t%d\t%d\t",
4653                        use->cost_map[j].cand->id,
4654                        use->cost_map[j].cost.cost,
4655                        use->cost_map[j].cost.complexity);
4656               if (use->cost_map[j].depends_on)
4657                 bitmap_print (dump_file,
4658                               use->cost_map[j].depends_on, "","");
4659               if (use->cost_map[j].inv_expr_id != -1)
4660                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4661               fprintf (dump_file, "\n");
4662             }
4663
4664           fprintf (dump_file, "\n");
4665         }
4666       fprintf (dump_file, "\n");
4667     }
4668 }
4669
4670 /* Determines cost of the candidate CAND.  */
4671
4672 static void
4673 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4674 {
4675   comp_cost cost_base;
4676   unsigned cost, cost_step;
4677   tree base;
4678
4679   if (!cand->iv)
4680     {
4681       cand->cost = 0;
4682       return;
4683     }
4684
4685   /* There are two costs associated with the candidate -- its increment
4686      and its initialization.  The second is almost negligible for any loop
4687      that rolls enough, so we take it just very little into account.  */
4688
4689   base = cand->iv->base;
4690   cost_base = force_var_cost (data, base, NULL);
4691   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4692
4693   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4694
4695   /* Prefer the original ivs unless we may gain something by replacing it.
4696      The reason is to make debugging simpler; so this is not relevant for
4697      artificial ivs created by other optimization passes.  */
4698   if (cand->pos != IP_ORIGINAL
4699       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4700     cost++;
4701
4702   /* Prefer not to insert statements into latch unless there are some
4703      already (so that we do not create unnecessary jumps).  */
4704   if (cand->pos == IP_END
4705       && empty_block_p (ip_end_pos (data->current_loop)))
4706     cost++;
4707
4708   cand->cost = cost;
4709   cand->cost_step = cost_step;
4710 }
4711
4712 /* Determines costs of computation of the candidates.  */
4713
4714 static void
4715 determine_iv_costs (struct ivopts_data *data)
4716 {
4717   unsigned i;
4718
4719   if (dump_file && (dump_flags & TDF_DETAILS))
4720     {
4721       fprintf (dump_file, "Candidate costs:\n");
4722       fprintf (dump_file, "  cand\tcost\n");
4723     }
4724
4725   for (i = 0; i < n_iv_cands (data); i++)
4726     {
4727       struct iv_cand *cand = iv_cand (data, i);
4728
4729       determine_iv_cost (data, cand);
4730
4731       if (dump_file && (dump_flags & TDF_DETAILS))
4732         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4733     }
4734
4735   if (dump_file && (dump_flags & TDF_DETAILS))
4736     fprintf (dump_file, "\n");
4737 }
4738
4739 /* Calculates cost for having SIZE induction variables.  */
4740
4741 static unsigned
4742 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4743 {
4744   /* We add size to the cost, so that we prefer eliminating ivs
4745      if possible.  */
4746   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4747                                             data->body_includes_call);
4748 }
4749
4750 /* For each size of the induction variable set determine the penalty.  */
4751
4752 static void
4753 determine_set_costs (struct ivopts_data *data)
4754 {
4755   unsigned j, n;
4756   gimple phi;
4757   gimple_stmt_iterator psi;
4758   tree op;
4759   struct loop *loop = data->current_loop;
4760   bitmap_iterator bi;
4761
4762   if (dump_file && (dump_flags & TDF_DETAILS))
4763     {
4764       fprintf (dump_file, "Global costs:\n");
4765       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4766       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
4767       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
4768       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
4769     }
4770
4771   n = 0;
4772   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4773     {
4774       phi = gsi_stmt (psi);
4775       op = PHI_RESULT (phi);
4776
4777       if (!is_gimple_reg (op))
4778         continue;
4779
4780       if (get_iv (data, op))
4781         continue;
4782
4783       n++;
4784     }
4785
4786   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4787     {
4788       struct version_info *info = ver_info (data, j);
4789
4790       if (info->inv_id && info->has_nonlin_use)
4791         n++;
4792     }
4793
4794   data->regs_used = n;
4795   if (dump_file && (dump_flags & TDF_DETAILS))
4796     fprintf (dump_file, "  regs_used %d\n", n);
4797
4798   if (dump_file && (dump_flags & TDF_DETAILS))
4799     {
4800       fprintf (dump_file, "  cost for size:\n");
4801       fprintf (dump_file, "  ivs\tcost\n");
4802       for (j = 0; j <= 2 * target_avail_regs; j++)
4803         fprintf (dump_file, "  %d\t%d\n", j,
4804                  ivopts_global_cost_for_size (data, j));
4805       fprintf (dump_file, "\n");
4806     }
4807 }
4808
4809 /* Returns true if A is a cheaper cost pair than B.  */
4810
4811 static bool
4812 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4813 {
4814   int cmp;
4815
4816   if (!a)
4817     return false;
4818
4819   if (!b)
4820     return true;
4821
4822   cmp = compare_costs (a->cost, b->cost);
4823   if (cmp < 0)
4824     return true;
4825
4826   if (cmp > 0)
4827     return false;
4828
4829   /* In case the costs are the same, prefer the cheaper candidate.  */
4830   if (a->cand->cost < b->cand->cost)
4831     return true;
4832
4833   return false;
4834 }
4835
4836
4837 /* Returns candidate by that USE is expressed in IVS.  */
4838
4839 static struct cost_pair *
4840 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4841 {
4842   return ivs->cand_for_use[use->id];
4843 }
4844
4845 /* Computes the cost field of IVS structure.  */
4846
4847 static void
4848 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4849 {
4850   comp_cost cost = ivs->cand_use_cost;
4851
4852   cost.cost += ivs->cand_cost;
4853
4854   cost.cost += ivopts_global_cost_for_size (data,
4855                                             ivs->n_regs + ivs->num_used_inv_expr);
4856
4857   ivs->cost = cost;
4858 }
4859
4860 /* Remove invariants in set INVS to set IVS.  */
4861
4862 static void
4863 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4864 {
4865   bitmap_iterator bi;
4866   unsigned iid;
4867
4868   if (!invs)
4869     return;
4870
4871   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4872     {
4873       ivs->n_invariant_uses[iid]--;
4874       if (ivs->n_invariant_uses[iid] == 0)
4875         ivs->n_regs--;
4876     }
4877 }
4878
4879 /* Set USE not to be expressed by any candidate in IVS.  */
4880
4881 static void
4882 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4883                  struct iv_use *use)
4884 {
4885   unsigned uid = use->id, cid;
4886   struct cost_pair *cp;
4887
4888   cp = ivs->cand_for_use[uid];
4889   if (!cp)
4890     return;
4891   cid = cp->cand->id;
4892
4893   ivs->bad_uses++;
4894   ivs->cand_for_use[uid] = NULL;
4895   ivs->n_cand_uses[cid]--;
4896
4897   if (ivs->n_cand_uses[cid] == 0)
4898     {
4899       bitmap_clear_bit (ivs->cands, cid);
4900       /* Do not count the pseudocandidates.  */
4901       if (cp->cand->iv)
4902         ivs->n_regs--;
4903       ivs->n_cands--;
4904       ivs->cand_cost -= cp->cand->cost;
4905
4906       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4907     }
4908
4909   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4910
4911   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4912
4913   if (cp->inv_expr_id != -1)
4914     {
4915       ivs->used_inv_expr[cp->inv_expr_id]--;
4916       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
4917         ivs->num_used_inv_expr--;
4918     }
4919   iv_ca_recount_cost (data, ivs);
4920 }
4921
4922 /* Add invariants in set INVS to set IVS.  */
4923
4924 static void
4925 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4926 {
4927   bitmap_iterator bi;
4928   unsigned iid;
4929
4930   if (!invs)
4931     return;
4932
4933   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4934     {
4935       ivs->n_invariant_uses[iid]++;
4936       if (ivs->n_invariant_uses[iid] == 1)
4937         ivs->n_regs++;
4938     }
4939 }
4940
4941 /* Set cost pair for USE in set IVS to CP.  */
4942
4943 static void
4944 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4945               struct iv_use *use, struct cost_pair *cp)
4946 {
4947   unsigned uid = use->id, cid;
4948
4949   if (ivs->cand_for_use[uid] == cp)
4950     return;
4951
4952   if (ivs->cand_for_use[uid])
4953     iv_ca_set_no_cp (data, ivs, use);
4954
4955   if (cp)
4956     {
4957       cid = cp->cand->id;
4958
4959       ivs->bad_uses--;
4960       ivs->cand_for_use[uid] = cp;
4961       ivs->n_cand_uses[cid]++;
4962       if (ivs->n_cand_uses[cid] == 1)
4963         {
4964           bitmap_set_bit (ivs->cands, cid);
4965           /* Do not count the pseudocandidates.  */
4966           if (cp->cand->iv)
4967             ivs->n_regs++;
4968           ivs->n_cands++;
4969           ivs->cand_cost += cp->cand->cost;
4970
4971           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4972         }
4973
4974       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4975       iv_ca_set_add_invariants (ivs, cp->depends_on);
4976
4977       if (cp->inv_expr_id != -1)
4978         {
4979           ivs->used_inv_expr[cp->inv_expr_id]++;
4980           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
4981             ivs->num_used_inv_expr++;
4982         }
4983       iv_ca_recount_cost (data, ivs);
4984     }
4985 }
4986
4987 /* Extend set IVS by expressing USE by some of the candidates in it
4988    if possible. All important candidates will be considered
4989    if IMPORTANT_CANDIDATES is true.  */
4990
4991 static void
4992 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4993                struct iv_use *use, bool important_candidates)
4994 {
4995   struct cost_pair *best_cp = NULL, *cp;
4996   bitmap_iterator bi;
4997   bitmap cands;
4998   unsigned i;
4999
5000   gcc_assert (ivs->upto >= use->id);
5001
5002   if (ivs->upto == use->id)
5003     {
5004       ivs->upto++;
5005       ivs->bad_uses++;
5006     }
5007
5008   cands = (important_candidates ? data->important_candidates : ivs->cands);
5009   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5010     {
5011       struct iv_cand *cand = iv_cand (data, i);
5012
5013       cp = get_use_iv_cost (data, use, cand);
5014
5015       if (cheaper_cost_pair (cp, best_cp))
5016         best_cp = cp;
5017     }
5018
5019   iv_ca_set_cp (data, ivs, use, best_cp);
5020 }
5021
5022 /* Get cost for assignment IVS.  */
5023
5024 static comp_cost
5025 iv_ca_cost (struct iv_ca *ivs)
5026 {
5027   /* This was a conditional expression but it triggered a bug in
5028      Sun C 5.5.  */
5029   if (ivs->bad_uses)
5030     return infinite_cost;
5031   else
5032     return ivs->cost;
5033 }
5034
5035 /* Returns true if all dependences of CP are among invariants in IVS.  */
5036
5037 static bool
5038 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5039 {
5040   unsigned i;
5041   bitmap_iterator bi;
5042
5043   if (!cp->depends_on)
5044     return true;
5045
5046   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5047     {
5048       if (ivs->n_invariant_uses[i] == 0)
5049         return false;
5050     }
5051
5052   return true;
5053 }
5054
5055 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5056    it before NEXT_CHANGE.  */
5057
5058 static struct iv_ca_delta *
5059 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5060                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5061 {
5062   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5063
5064   change->use = use;
5065   change->old_cp = old_cp;
5066   change->new_cp = new_cp;
5067   change->next_change = next_change;
5068
5069   return change;
5070 }
5071
5072 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5073    are rewritten.  */
5074
5075 static struct iv_ca_delta *
5076 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5077 {
5078   struct iv_ca_delta *last;
5079
5080   if (!l2)
5081     return l1;
5082
5083   if (!l1)
5084     return l2;
5085
5086   for (last = l1; last->next_change; last = last->next_change)
5087     continue;
5088   last->next_change = l2;
5089
5090   return l1;
5091 }
5092
5093 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5094
5095 static struct iv_ca_delta *
5096 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5097 {
5098   struct iv_ca_delta *act, *next, *prev = NULL;
5099   struct cost_pair *tmp;
5100
5101   for (act = delta; act; act = next)
5102     {
5103       next = act->next_change;
5104       act->next_change = prev;
5105       prev = act;
5106
5107       tmp = act->old_cp;
5108       act->old_cp = act->new_cp;
5109       act->new_cp = tmp;
5110     }
5111
5112   return prev;
5113 }
5114
5115 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5116    reverted instead.  */
5117
5118 static void
5119 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5120                     struct iv_ca_delta *delta, bool forward)
5121 {
5122   struct cost_pair *from, *to;
5123   struct iv_ca_delta *act;
5124
5125   if (!forward)
5126     delta = iv_ca_delta_reverse (delta);
5127
5128   for (act = delta; act; act = act->next_change)
5129     {
5130       from = act->old_cp;
5131       to = act->new_cp;
5132       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5133       iv_ca_set_cp (data, ivs, act->use, to);
5134     }
5135
5136   if (!forward)
5137     iv_ca_delta_reverse (delta);
5138 }
5139
5140 /* Returns true if CAND is used in IVS.  */
5141
5142 static bool
5143 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5144 {
5145   return ivs->n_cand_uses[cand->id] > 0;
5146 }
5147
5148 /* Returns number of induction variable candidates in the set IVS.  */
5149
5150 static unsigned
5151 iv_ca_n_cands (struct iv_ca *ivs)
5152 {
5153   return ivs->n_cands;
5154 }
5155
5156 /* Free the list of changes DELTA.  */
5157
5158 static void
5159 iv_ca_delta_free (struct iv_ca_delta **delta)
5160 {
5161   struct iv_ca_delta *act, *next;
5162
5163   for (act = *delta; act; act = next)
5164     {
5165       next = act->next_change;
5166       free (act);
5167     }
5168
5169   *delta = NULL;
5170 }
5171
5172 /* Allocates new iv candidates assignment.  */
5173
5174 static struct iv_ca *
5175 iv_ca_new (struct ivopts_data *data)
5176 {
5177   struct iv_ca *nw = XNEW (struct iv_ca);
5178
5179   nw->upto = 0;
5180   nw->bad_uses = 0;
5181   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5182   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5183   nw->cands = BITMAP_ALLOC (NULL);
5184   nw->n_cands = 0;
5185   nw->n_regs = 0;
5186   nw->cand_use_cost = zero_cost;
5187   nw->cand_cost = 0;
5188   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5189   nw->cost = zero_cost;
5190   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5191   nw->num_used_inv_expr = 0;
5192
5193   return nw;
5194 }
5195
5196 /* Free memory occupied by the set IVS.  */
5197
5198 static void
5199 iv_ca_free (struct iv_ca **ivs)
5200 {
5201   free ((*ivs)->cand_for_use);
5202   free ((*ivs)->n_cand_uses);
5203   BITMAP_FREE ((*ivs)->cands);
5204   free ((*ivs)->n_invariant_uses);
5205   free ((*ivs)->used_inv_expr);
5206   free (*ivs);
5207   *ivs = NULL;
5208 }
5209
5210 /* Dumps IVS to FILE.  */
5211
5212 static void
5213 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5214 {
5215   const char *pref = "  invariants ";
5216   unsigned i;
5217   comp_cost cost = iv_ca_cost (ivs);
5218
5219   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5220   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5221            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5222   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5223
5224    for (i = 0; i < ivs->upto; i++)
5225     {
5226       struct iv_use *use = iv_use (data, i);
5227       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5228       if (cp)
5229         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5230                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5231       else
5232         fprintf (file, "   use:%d --> ??\n", use->id);
5233     }
5234
5235   for (i = 1; i <= data->max_inv_id; i++)
5236     if (ivs->n_invariant_uses[i])
5237       {
5238         fprintf (file, "%s%d", pref, i);
5239         pref = ", ";
5240       }
5241   fprintf (file, "\n\n");
5242 }
5243
5244 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5245    new set, and store differences in DELTA.  Number of induction variables
5246    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5247    the function will try to find a solution with mimimal iv candidates.  */
5248
5249 static comp_cost
5250 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5251               struct iv_cand *cand, struct iv_ca_delta **delta,
5252               unsigned *n_ivs, bool min_ncand)
5253 {
5254   unsigned i;
5255   comp_cost cost;
5256   struct iv_use *use;
5257   struct cost_pair *old_cp, *new_cp;
5258
5259   *delta = NULL;
5260   for (i = 0; i < ivs->upto; i++)
5261     {
5262       use = iv_use (data, i);
5263       old_cp = iv_ca_cand_for_use (ivs, use);
5264
5265       if (old_cp
5266           && old_cp->cand == cand)
5267         continue;
5268
5269       new_cp = get_use_iv_cost (data, use, cand);
5270       if (!new_cp)
5271         continue;
5272
5273       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5274         continue;
5275
5276       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5277         continue;
5278
5279       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5280     }
5281
5282   iv_ca_delta_commit (data, ivs, *delta, true);
5283   cost = iv_ca_cost (ivs);
5284   if (n_ivs)
5285     *n_ivs = iv_ca_n_cands (ivs);
5286   iv_ca_delta_commit (data, ivs, *delta, false);
5287
5288   return cost;
5289 }
5290
5291 /* Try narrowing set IVS by removing CAND.  Return the cost of
5292    the new set and store the differences in DELTA.  */
5293
5294 static comp_cost
5295 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5296               struct iv_cand *cand, struct iv_ca_delta **delta)
5297 {
5298   unsigned i, ci;
5299   struct iv_use *use;
5300   struct cost_pair *old_cp, *new_cp, *cp;
5301   bitmap_iterator bi;
5302   struct iv_cand *cnd;
5303   comp_cost cost;
5304
5305   *delta = NULL;
5306   for (i = 0; i < n_iv_uses (data); i++)
5307     {
5308       use = iv_use (data, i);
5309
5310       old_cp = iv_ca_cand_for_use (ivs, use);
5311       if (old_cp->cand != cand)
5312         continue;
5313
5314       new_cp = NULL;
5315
5316       if (data->consider_all_candidates)
5317         {
5318           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5319             {
5320               if (ci == cand->id)
5321                 continue;
5322
5323               cnd = iv_cand (data, ci);
5324
5325               cp = get_use_iv_cost (data, use, cnd);
5326               if (!cp)
5327                 continue;
5328
5329               if (!iv_ca_has_deps (ivs, cp))
5330                 continue;
5331
5332               if (!cheaper_cost_pair (cp, new_cp))
5333                 continue;
5334
5335               new_cp = cp;
5336             }
5337         }
5338       else
5339         {
5340           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5341             {
5342               if (ci == cand->id)
5343                 continue;
5344
5345               cnd = iv_cand (data, ci);
5346
5347               cp = get_use_iv_cost (data, use, cnd);
5348               if (!cp)
5349                 continue;
5350               if (!iv_ca_has_deps (ivs, cp))
5351                 continue;
5352
5353               if (!cheaper_cost_pair (cp, new_cp))
5354                 continue;
5355
5356               new_cp = cp;
5357             }
5358         }
5359
5360       if (!new_cp)
5361         {
5362           iv_ca_delta_free (delta);
5363           return infinite_cost;
5364         }
5365
5366       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5367     }
5368
5369   iv_ca_delta_commit (data, ivs, *delta, true);
5370   cost = iv_ca_cost (ivs);
5371   iv_ca_delta_commit (data, ivs, *delta, false);
5372
5373   return cost;
5374 }
5375
5376 /* Try optimizing the set of candidates IVS by removing candidates different
5377    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5378    differences in DELTA.  */
5379
5380 static comp_cost
5381 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5382              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5383 {
5384   bitmap_iterator bi;
5385   struct iv_ca_delta *act_delta, *best_delta;
5386   unsigned i;
5387   comp_cost best_cost, acost;
5388   struct iv_cand *cand;
5389
5390   best_delta = NULL;
5391   best_cost = iv_ca_cost (ivs);
5392
5393   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5394     {
5395       cand = iv_cand (data, i);
5396
5397       if (cand == except_cand)
5398         continue;
5399
5400       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5401
5402       if (compare_costs (acost, best_cost) < 0)
5403         {
5404           best_cost = acost;
5405           iv_ca_delta_free (&best_delta);
5406           best_delta = act_delta;
5407         }
5408       else
5409         iv_ca_delta_free (&act_delta);
5410     }
5411
5412   if (!best_delta)
5413     {
5414       *delta = NULL;
5415       return best_cost;
5416     }
5417
5418   /* Recurse to possibly remove other unnecessary ivs.  */
5419   iv_ca_delta_commit (data, ivs, best_delta, true);
5420   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5421   iv_ca_delta_commit (data, ivs, best_delta, false);
5422   *delta = iv_ca_delta_join (best_delta, *delta);
5423   return best_cost;
5424 }
5425
5426 /* Tries to extend the sets IVS in the best possible way in order
5427    to express the USE.  If ORIGINALP is true, prefer candidates from
5428    the original set of IVs, otherwise favor important candidates not
5429    based on any memory object.  */
5430
5431 static bool
5432 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5433                   struct iv_use *use, bool originalp)
5434 {
5435   comp_cost best_cost, act_cost;
5436   unsigned i;
5437   bitmap_iterator bi;
5438   struct iv_cand *cand;
5439   struct iv_ca_delta *best_delta = NULL, *act_delta;
5440   struct cost_pair *cp;
5441
5442   iv_ca_add_use (data, ivs, use, false);
5443   best_cost = iv_ca_cost (ivs);
5444
5445   cp = iv_ca_cand_for_use (ivs, use);
5446   if (!cp)
5447     {
5448       ivs->upto--;
5449       ivs->bad_uses--;
5450       iv_ca_add_use (data, ivs, use, true);
5451       best_cost = iv_ca_cost (ivs);
5452       cp = iv_ca_cand_for_use (ivs, use);
5453     }
5454   if (cp)
5455     {
5456       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5457       iv_ca_set_no_cp (data, ivs, use);
5458     }
5459
5460   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5461      first try important candidates not based on any memory object.  Only if
5462      this fails, try the specific ones.  Rationale -- in loops with many
5463      variables the best choice often is to use just one generic biv.  If we
5464      added here many ivs specific to the uses, the optimization algorithm later
5465      would be likely to get stuck in a local minimum, thus causing us to create
5466      too many ivs.  The approach from few ivs to more seems more likely to be
5467      successful -- starting from few ivs, replacing an expensive use by a
5468      specific iv should always be a win.  */
5469   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5470     {
5471       cand = iv_cand (data, i);
5472
5473       if (originalp && cand->pos !=IP_ORIGINAL)
5474         continue;
5475
5476       if (!originalp && cand->iv->base_object != NULL_TREE)
5477         continue;
5478
5479       if (iv_ca_cand_used_p (ivs, cand))
5480         continue;
5481
5482       cp = get_use_iv_cost (data, use, cand);
5483       if (!cp)
5484         continue;
5485
5486       iv_ca_set_cp (data, ivs, use, cp);
5487       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5488                                true);
5489       iv_ca_set_no_cp (data, ivs, use);
5490       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5491
5492       if (compare_costs (act_cost, best_cost) < 0)
5493         {
5494           best_cost = act_cost;
5495
5496           iv_ca_delta_free (&best_delta);
5497           best_delta = act_delta;
5498         }
5499       else
5500         iv_ca_delta_free (&act_delta);
5501     }
5502
5503   if (infinite_cost_p (best_cost))
5504     {
5505       for (i = 0; i < use->n_map_members; i++)
5506         {
5507           cp = use->cost_map + i;
5508           cand = cp->cand;
5509           if (!cand)
5510             continue;
5511
5512           /* Already tried this.  */
5513           if (cand->important)
5514             {
5515               if (originalp && cand->pos == IP_ORIGINAL)
5516                 continue;
5517               if (!originalp && cand->iv->base_object == NULL_TREE)
5518                 continue;
5519             }
5520
5521           if (iv_ca_cand_used_p (ivs, cand))
5522             continue;
5523
5524           act_delta = NULL;
5525           iv_ca_set_cp (data, ivs, use, cp);
5526           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5527           iv_ca_set_no_cp (data, ivs, use);
5528           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5529                                        cp, act_delta);
5530
5531           if (compare_costs (act_cost, best_cost) < 0)
5532             {
5533               best_cost = act_cost;
5534
5535               if (best_delta)
5536                 iv_ca_delta_free (&best_delta);
5537               best_delta = act_delta;
5538             }
5539           else
5540             iv_ca_delta_free (&act_delta);
5541         }
5542     }
5543
5544   iv_ca_delta_commit (data, ivs, best_delta, true);
5545   iv_ca_delta_free (&best_delta);
5546
5547   return !infinite_cost_p (best_cost);
5548 }
5549
5550 /* Finds an initial assignment of candidates to uses.  */
5551
5552 static struct iv_ca *
5553 get_initial_solution (struct ivopts_data *data, bool originalp)
5554 {
5555   struct iv_ca *ivs = iv_ca_new (data);
5556   unsigned i;
5557
5558   for (i = 0; i < n_iv_uses (data); i++)
5559     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5560       {
5561         iv_ca_free (&ivs);
5562         return NULL;
5563       }
5564
5565   return ivs;
5566 }
5567
5568 /* Tries to improve set of induction variables IVS.  */
5569
5570 static bool
5571 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5572 {
5573   unsigned i, n_ivs;
5574   comp_cost acost, best_cost = iv_ca_cost (ivs);
5575   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5576   struct iv_cand *cand;
5577
5578   /* Try extending the set of induction variables by one.  */
5579   for (i = 0; i < n_iv_cands (data); i++)
5580     {
5581       cand = iv_cand (data, i);
5582
5583       if (iv_ca_cand_used_p (ivs, cand))
5584         continue;
5585
5586       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5587       if (!act_delta)
5588         continue;
5589
5590       /* If we successfully added the candidate and the set is small enough,
5591          try optimizing it by removing other candidates.  */
5592       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5593         {
5594           iv_ca_delta_commit (data, ivs, act_delta, true);
5595           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5596           iv_ca_delta_commit (data, ivs, act_delta, false);
5597           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5598         }
5599
5600       if (compare_costs (acost, best_cost) < 0)
5601         {
5602           best_cost = acost;
5603           iv_ca_delta_free (&best_delta);
5604           best_delta = act_delta;
5605         }
5606       else
5607         iv_ca_delta_free (&act_delta);
5608     }
5609
5610   if (!best_delta)
5611     {
5612       /* Try removing the candidates from the set instead.  */
5613       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5614
5615       /* Nothing more we can do.  */
5616       if (!best_delta)
5617         return false;
5618     }
5619
5620   iv_ca_delta_commit (data, ivs, best_delta, true);
5621   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5622   iv_ca_delta_free (&best_delta);
5623   return true;
5624 }
5625
5626 /* Attempts to find the optimal set of induction variables.  We do simple
5627    greedy heuristic -- we try to replace at most one candidate in the selected
5628    solution and remove the unused ivs while this improves the cost.  */
5629
5630 static struct iv_ca *
5631 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5632 {
5633   struct iv_ca *set;
5634
5635   /* Get the initial solution.  */
5636   set = get_initial_solution (data, originalp);
5637   if (!set)
5638     {
5639       if (dump_file && (dump_flags & TDF_DETAILS))
5640         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5641       return NULL;
5642     }
5643
5644   if (dump_file && (dump_flags & TDF_DETAILS))
5645     {
5646       fprintf (dump_file, "Initial set of candidates:\n");
5647       iv_ca_dump (data, dump_file, set);
5648     }
5649
5650   while (try_improve_iv_set (data, set))
5651     {
5652       if (dump_file && (dump_flags & TDF_DETAILS))
5653         {
5654           fprintf (dump_file, "Improved to:\n");
5655           iv_ca_dump (data, dump_file, set);
5656         }
5657     }
5658
5659   return set;
5660 }
5661
5662 static struct iv_ca *
5663 find_optimal_iv_set (struct ivopts_data *data)
5664 {
5665   unsigned i;
5666   struct iv_ca *set, *origset;
5667   struct iv_use *use;
5668   comp_cost cost, origcost;
5669
5670   /* Determine the cost based on a strategy that starts with original IVs,
5671      and try again using a strategy that prefers candidates not based
5672      on any IVs.  */
5673   origset = find_optimal_iv_set_1 (data, true);
5674   set = find_optimal_iv_set_1 (data, false);
5675
5676   if (!origset && !set)
5677     return NULL;
5678
5679   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5680   cost = set ? iv_ca_cost (set) : infinite_cost;
5681
5682   if (dump_file && (dump_flags & TDF_DETAILS))
5683     {
5684       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5685                origcost.cost, origcost.complexity);
5686       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5687                cost.cost, cost.complexity);
5688     }
5689
5690   /* Choose the one with the best cost.  */
5691   if (compare_costs (origcost, cost) <= 0)
5692     {
5693       if (set)
5694         iv_ca_free (&set);
5695       set = origset;
5696     }
5697   else if (origset)
5698     iv_ca_free (&origset);
5699
5700   for (i = 0; i < n_iv_uses (data); i++)
5701     {
5702       use = iv_use (data, i);
5703       use->selected = iv_ca_cand_for_use (set, use)->cand;
5704     }
5705
5706   return set;
5707 }
5708
5709 /* Creates a new induction variable corresponding to CAND.  */
5710
5711 static void
5712 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5713 {
5714   gimple_stmt_iterator incr_pos;
5715   tree base;
5716   bool after = false;
5717
5718   if (!cand->iv)
5719     return;
5720
5721   switch (cand->pos)
5722     {
5723     case IP_NORMAL:
5724       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5725       break;
5726
5727     case IP_END:
5728       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5729       after = true;
5730       break;
5731
5732     case IP_AFTER_USE:
5733       after = true;
5734       /* fall through */
5735     case IP_BEFORE_USE:
5736       incr_pos = gsi_for_stmt (cand->incremented_at);
5737       break;
5738
5739     case IP_ORIGINAL:
5740       /* Mark that the iv is preserved.  */
5741       name_info (data, cand->var_before)->preserve_biv = true;
5742       name_info (data, cand->var_after)->preserve_biv = true;
5743
5744       /* Rewrite the increment so that it uses var_before directly.  */
5745       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5746       return;
5747     }
5748
5749   gimple_add_tmp_var (cand->var_before);
5750   add_referenced_var (cand->var_before);
5751
5752   base = unshare_expr (cand->iv->base);
5753
5754   create_iv (base, unshare_expr (cand->iv->step),
5755              cand->var_before, data->current_loop,
5756              &incr_pos, after, &cand->var_before, &cand->var_after);
5757 }
5758
5759 /* Creates new induction variables described in SET.  */
5760
5761 static void
5762 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5763 {
5764   unsigned i;
5765   struct iv_cand *cand;
5766   bitmap_iterator bi;
5767
5768   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5769     {
5770       cand = iv_cand (data, i);
5771       create_new_iv (data, cand);
5772     }
5773
5774   if (dump_file && (dump_flags & TDF_DETAILS))
5775     {
5776       fprintf (dump_file, "\nSelected IV set: \n");
5777       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5778         {
5779           cand = iv_cand (data, i);
5780           dump_cand (dump_file, cand);
5781         }
5782       fprintf (dump_file, "\n");
5783     }
5784 }
5785
5786 /* Rewrites USE (definition of iv used in a nonlinear expression)
5787    using candidate CAND.  */
5788
5789 static void
5790 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5791                             struct iv_use *use, struct iv_cand *cand)
5792 {
5793   tree comp;
5794   tree op, tgt;
5795   gimple ass;
5796   gimple_stmt_iterator bsi;
5797
5798   /* An important special case -- if we are asked to express value of
5799      the original iv by itself, just exit; there is no need to
5800      introduce a new computation (that might also need casting the
5801      variable to unsigned and back).  */
5802   if (cand->pos == IP_ORIGINAL
5803       && cand->incremented_at == use->stmt)
5804     {
5805       tree step, ctype, utype;
5806       enum tree_code incr_code = PLUS_EXPR, old_code;
5807
5808       gcc_assert (is_gimple_assign (use->stmt));
5809       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5810
5811       step = cand->iv->step;
5812       ctype = TREE_TYPE (step);
5813       utype = TREE_TYPE (cand->var_after);
5814       if (TREE_CODE (step) == NEGATE_EXPR)
5815         {
5816           incr_code = MINUS_EXPR;
5817           step = TREE_OPERAND (step, 0);
5818         }
5819
5820       /* Check whether we may leave the computation unchanged.
5821          This is the case only if it does not rely on other
5822          computations in the loop -- otherwise, the computation
5823          we rely upon may be removed in remove_unused_ivs,
5824          thus leading to ICE.  */
5825       old_code = gimple_assign_rhs_code (use->stmt);
5826       if (old_code == PLUS_EXPR
5827           || old_code == MINUS_EXPR
5828           || old_code == POINTER_PLUS_EXPR)
5829         {
5830           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5831             op = gimple_assign_rhs2 (use->stmt);
5832           else if (old_code != MINUS_EXPR
5833                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5834             op = gimple_assign_rhs1 (use->stmt);
5835           else
5836             op = NULL_TREE;
5837         }
5838       else
5839         op = NULL_TREE;
5840
5841       if (op
5842           && (TREE_CODE (op) == INTEGER_CST
5843               || operand_equal_p (op, step, 0)))
5844         return;
5845
5846       /* Otherwise, add the necessary computations to express
5847          the iv.  */
5848       op = fold_convert (ctype, cand->var_before);
5849       comp = fold_convert (utype,
5850                            build2 (incr_code, ctype, op,
5851                                    unshare_expr (step)));
5852     }
5853   else
5854     {
5855       comp = get_computation (data->current_loop, use, cand);
5856       gcc_assert (comp != NULL_TREE);
5857     }
5858
5859   switch (gimple_code (use->stmt))
5860     {
5861     case GIMPLE_PHI:
5862       tgt = PHI_RESULT (use->stmt);
5863
5864       /* If we should keep the biv, do not replace it.  */
5865       if (name_info (data, tgt)->preserve_biv)
5866         return;
5867
5868       bsi = gsi_after_labels (gimple_bb (use->stmt));
5869       break;
5870
5871     case GIMPLE_ASSIGN:
5872       tgt = gimple_assign_lhs (use->stmt);
5873       bsi = gsi_for_stmt (use->stmt);
5874       break;
5875
5876     default:
5877       gcc_unreachable ();
5878     }
5879
5880   if (!valid_gimple_rhs_p (comp)
5881       || (gimple_code (use->stmt) != GIMPLE_PHI
5882           /* We can't allow re-allocating the stmt as it might be pointed
5883              to still.  */
5884           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5885               >= gimple_num_ops (gsi_stmt (bsi)))))
5886     {
5887       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5888                                        true, GSI_SAME_STMT);
5889       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5890         {
5891           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5892           /* As this isn't a plain copy we have to reset alignment
5893              information.  */
5894           if (SSA_NAME_PTR_INFO (comp))
5895             {
5896               SSA_NAME_PTR_INFO (comp)->align = 1;
5897               SSA_NAME_PTR_INFO (comp)->misalign = 0;
5898             }
5899         }
5900     }
5901
5902   if (gimple_code (use->stmt) == GIMPLE_PHI)
5903     {
5904       ass = gimple_build_assign (tgt, comp);
5905       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5906
5907       bsi = gsi_for_stmt (use->stmt);
5908       remove_phi_node (&bsi, false);
5909     }
5910   else
5911     {
5912       gimple_assign_set_rhs_from_tree (&bsi, comp);
5913       use->stmt = gsi_stmt (bsi);
5914     }
5915 }
5916
5917 /* Copies the reference information from OLD_REF to NEW_REF.  */
5918
5919 static void
5920 copy_ref_info (tree new_ref, tree old_ref)
5921 {
5922   tree new_ptr_base = NULL_TREE;
5923
5924   TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5925   TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5926
5927   new_ptr_base = TREE_OPERAND (new_ref, 0);
5928
5929   /* We can transfer points-to information from an old pointer
5930      or decl base to the new one.  */
5931   if (new_ptr_base
5932       && TREE_CODE (new_ptr_base) == SSA_NAME
5933       && !SSA_NAME_PTR_INFO (new_ptr_base))
5934     {
5935       tree base = get_base_address (old_ref);
5936       if (!base)
5937         ;
5938       else if ((TREE_CODE (base) == MEM_REF
5939                 || TREE_CODE (base) == TARGET_MEM_REF)
5940                && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
5941                && SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)))
5942         {
5943           struct ptr_info_def *new_pi;
5944           duplicate_ssa_name_ptr_info
5945             (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5946           new_pi = SSA_NAME_PTR_INFO (new_ptr_base);
5947           /* We have to be careful about transfering alignment information.  */
5948           if (TREE_CODE (old_ref) == MEM_REF
5949               && !(TREE_CODE (new_ref) == TARGET_MEM_REF
5950                    && (TMR_INDEX2 (new_ref)
5951                        || (TMR_STEP (new_ref)
5952                            && (TREE_INT_CST_LOW (TMR_STEP (new_ref))
5953                                < new_pi->align)))))
5954             {
5955               new_pi->misalign += double_int_sub (mem_ref_offset (old_ref),
5956                                                   mem_ref_offset (new_ref)).low;
5957               new_pi->misalign &= (new_pi->align - 1);
5958             }
5959           else
5960             {
5961               new_pi->align = 1;
5962               new_pi->misalign = 0;
5963             }
5964         }
5965       else if (TREE_CODE (base) == VAR_DECL
5966                || TREE_CODE (base) == PARM_DECL
5967                || TREE_CODE (base) == RESULT_DECL)
5968         {
5969           struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5970           pt_solution_set_var (&pi->pt, base);
5971         }
5972     }
5973 }
5974
5975 /* Performs a peephole optimization to reorder the iv update statement with
5976    a mem ref to enable instruction combining in later phases. The mem ref uses
5977    the iv value before the update, so the reordering transformation requires
5978    adjustment of the offset. CAND is the selected IV_CAND.
5979
5980    Example:
5981
5982    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
5983    iv2 = iv1 + 1;
5984
5985    if (t < val)      (1)
5986      goto L;
5987    goto Head;
5988
5989
5990    directly propagating t over to (1) will introduce overlapping live range
5991    thus increase register pressure. This peephole transform it into:
5992
5993
5994    iv2 = iv1 + 1;
5995    t = MEM_REF (base, iv2, 8, 8);
5996    if (t < val)
5997      goto L;
5998    goto Head;
5999 */
6000
6001 static void
6002 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6003 {
6004   tree var_after;
6005   gimple iv_update, stmt;
6006   basic_block bb;
6007   gimple_stmt_iterator gsi, gsi_iv;
6008
6009   if (cand->pos != IP_NORMAL)
6010     return;
6011
6012   var_after = cand->var_after;
6013   iv_update = SSA_NAME_DEF_STMT (var_after);
6014
6015   bb = gimple_bb (iv_update);
6016   gsi = gsi_last_nondebug_bb (bb);
6017   stmt = gsi_stmt (gsi);
6018
6019   /* Only handle conditional statement for now.  */
6020   if (gimple_code (stmt) != GIMPLE_COND)
6021     return;
6022
6023   gsi_prev_nondebug (&gsi);
6024   stmt = gsi_stmt (gsi);
6025   if (stmt != iv_update)
6026     return;
6027
6028   gsi_prev_nondebug (&gsi);
6029   if (gsi_end_p (gsi))
6030     return;
6031
6032   stmt = gsi_stmt (gsi);
6033   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6034     return;
6035
6036   if (stmt != use->stmt)
6037     return;
6038
6039   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6040     return;
6041
6042   if (dump_file && (dump_flags & TDF_DETAILS))
6043     {
6044       fprintf (dump_file, "Reordering \n");
6045       print_gimple_stmt (dump_file, iv_update, 0, 0);
6046       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6047       fprintf (dump_file, "\n");
6048     }
6049
6050   gsi = gsi_for_stmt (use->stmt);
6051   gsi_iv = gsi_for_stmt (iv_update);
6052   gsi_move_before (&gsi_iv, &gsi);
6053
6054   cand->pos = IP_BEFORE_USE;
6055   cand->incremented_at = use->stmt;
6056 }
6057
6058 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6059
6060 static void
6061 rewrite_use_address (struct ivopts_data *data,
6062                      struct iv_use *use, struct iv_cand *cand)
6063 {
6064   aff_tree aff;
6065   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6066   tree base_hint = NULL_TREE;
6067   tree ref, iv;
6068   bool ok;
6069
6070   adjust_iv_update_pos (cand, use);
6071   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6072   gcc_assert (ok);
6073   unshare_aff_combination (&aff);
6074
6075   /* To avoid undefined overflow problems, all IV candidates use unsigned
6076      integer types.  The drawback is that this makes it impossible for
6077      create_mem_ref to distinguish an IV that is based on a memory object
6078      from one that represents simply an offset.
6079
6080      To work around this problem, we pass a hint to create_mem_ref that
6081      indicates which variable (if any) in aff is an IV based on a memory
6082      object.  Note that we only consider the candidate.  If this is not
6083      based on an object, the base of the reference is in some subexpression
6084      of the use -- but these will use pointer types, so they are recognized
6085      by the create_mem_ref heuristics anyway.  */
6086   if (cand->iv->base_object)
6087     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6088
6089   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6090   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6091                         reference_alias_ptr_type (*use->op_p),
6092                         iv, base_hint, data->speed);
6093   copy_ref_info (ref, *use->op_p);
6094   *use->op_p = ref;
6095 }
6096
6097 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6098    candidate CAND.  */
6099
6100 static void
6101 rewrite_use_compare (struct ivopts_data *data,
6102                      struct iv_use *use, struct iv_cand *cand)
6103 {
6104   tree comp, *var_p, op, bound;
6105   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6106   enum tree_code compare;
6107   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6108   bool ok;
6109
6110   bound = cp->value;
6111   if (bound)
6112     {
6113       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6114       tree var_type = TREE_TYPE (var);
6115       gimple_seq stmts;
6116
6117       if (dump_file && (dump_flags & TDF_DETAILS))
6118         {
6119           fprintf (dump_file, "Replacing exit test: ");
6120           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6121         }
6122       compare = iv_elimination_compare (data, use);
6123       bound = unshare_expr (fold_convert (var_type, bound));
6124       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6125       if (stmts)
6126         gsi_insert_seq_on_edge_immediate (
6127                 loop_preheader_edge (data->current_loop),
6128                 stmts);
6129
6130       gimple_cond_set_lhs (use->stmt, var);
6131       gimple_cond_set_code (use->stmt, compare);
6132       gimple_cond_set_rhs (use->stmt, op);
6133       return;
6134     }
6135
6136   /* The induction variable elimination failed; just express the original
6137      giv.  */
6138   comp = get_computation (data->current_loop, use, cand);
6139   gcc_assert (comp != NULL_TREE);
6140
6141   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6142   gcc_assert (ok);
6143
6144   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6145                                      true, GSI_SAME_STMT);
6146 }
6147
6148 /* Rewrites USE using candidate CAND.  */
6149
6150 static void
6151 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6152 {
6153   switch (use->type)
6154     {
6155       case USE_NONLINEAR_EXPR:
6156         rewrite_use_nonlinear_expr (data, use, cand);
6157         break;
6158
6159       case USE_ADDRESS:
6160         rewrite_use_address (data, use, cand);
6161         break;
6162
6163       case USE_COMPARE:
6164         rewrite_use_compare (data, use, cand);
6165         break;
6166
6167       default:
6168         gcc_unreachable ();
6169     }
6170
6171   update_stmt (use->stmt);
6172 }
6173
6174 /* Rewrite the uses using the selected induction variables.  */
6175
6176 static void
6177 rewrite_uses (struct ivopts_data *data)
6178 {
6179   unsigned i;
6180   struct iv_cand *cand;
6181   struct iv_use *use;
6182
6183   for (i = 0; i < n_iv_uses (data); i++)
6184     {
6185       use = iv_use (data, i);
6186       cand = use->selected;
6187       gcc_assert (cand);
6188
6189       rewrite_use (data, use, cand);
6190     }
6191 }
6192
6193 /* Removes the ivs that are not used after rewriting.  */
6194
6195 static void
6196 remove_unused_ivs (struct ivopts_data *data)
6197 {
6198   unsigned j;
6199   bitmap_iterator bi;
6200   bitmap toremove = BITMAP_ALLOC (NULL);
6201
6202   /* Figure out an order in which to release SSA DEFs so that we don't
6203      release something that we'd have to propagate into a debug stmt
6204      afterwards.  */
6205   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6206     {
6207       struct version_info *info;
6208
6209       info = ver_info (data, j);
6210       if (info->iv
6211           && !integer_zerop (info->iv->step)
6212           && !info->inv_id
6213           && !info->iv->have_use_for
6214           && !info->preserve_biv)
6215         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6216     }
6217
6218   release_defs_bitset (toremove);
6219
6220   BITMAP_FREE (toremove);
6221 }
6222
6223 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6224    for pointer_map_traverse.  */
6225
6226 static bool
6227 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6228                       void *data ATTRIBUTE_UNUSED)
6229 {
6230   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6231
6232   free (niter);
6233   return true;
6234 }
6235
6236 /* Frees data allocated by the optimization of a single loop.  */
6237
6238 static void
6239 free_loop_data (struct ivopts_data *data)
6240 {
6241   unsigned i, j;
6242   bitmap_iterator bi;
6243   tree obj;
6244
6245   if (data->niters)
6246     {
6247       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6248       pointer_map_destroy (data->niters);
6249       data->niters = NULL;
6250     }
6251
6252   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6253     {
6254       struct version_info *info;
6255
6256       info = ver_info (data, i);
6257       if (info->iv)
6258         free (info->iv);
6259       info->iv = NULL;
6260       info->has_nonlin_use = false;
6261       info->preserve_biv = false;
6262       info->inv_id = 0;
6263     }
6264   bitmap_clear (data->relevant);
6265   bitmap_clear (data->important_candidates);
6266
6267   for (i = 0; i < n_iv_uses (data); i++)
6268     {
6269       struct iv_use *use = iv_use (data, i);
6270
6271       free (use->iv);
6272       BITMAP_FREE (use->related_cands);
6273       for (j = 0; j < use->n_map_members; j++)
6274         if (use->cost_map[j].depends_on)
6275           BITMAP_FREE (use->cost_map[j].depends_on);
6276       free (use->cost_map);
6277       free (use);
6278     }
6279   VEC_truncate (iv_use_p, data->iv_uses, 0);
6280
6281   for (i = 0; i < n_iv_cands (data); i++)
6282     {
6283       struct iv_cand *cand = iv_cand (data, i);
6284
6285       if (cand->iv)
6286         free (cand->iv);
6287       if (cand->depends_on)
6288         BITMAP_FREE (cand->depends_on);
6289       free (cand);
6290     }
6291   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6292
6293   if (data->version_info_size < num_ssa_names)
6294     {
6295       data->version_info_size = 2 * num_ssa_names;
6296       free (data->version_info);
6297       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6298     }
6299
6300   data->max_inv_id = 0;
6301
6302   FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6303     SET_DECL_RTL (obj, NULL_RTX);
6304
6305   VEC_truncate (tree, decl_rtl_to_reset, 0);
6306
6307   htab_empty (data->inv_expr_tab);
6308   data->inv_expr_id = 0;
6309 }
6310
6311 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6312    loop tree.  */
6313
6314 static void
6315 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6316 {
6317   free_loop_data (data);
6318   free (data->version_info);
6319   BITMAP_FREE (data->relevant);
6320   BITMAP_FREE (data->important_candidates);
6321
6322   VEC_free (tree, heap, decl_rtl_to_reset);
6323   VEC_free (iv_use_p, heap, data->iv_uses);
6324   VEC_free (iv_cand_p, heap, data->iv_candidates);
6325   htab_delete (data->inv_expr_tab);
6326 }
6327
6328 /* Returns true if the loop body BODY includes any function calls.  */
6329
6330 static bool
6331 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6332 {
6333   gimple_stmt_iterator gsi;
6334   unsigned i;
6335
6336   for (i = 0; i < num_nodes; i++)
6337     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6338       {
6339         gimple stmt = gsi_stmt (gsi);
6340         if (is_gimple_call (stmt)
6341             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6342           return true;
6343       }
6344   return false;
6345 }
6346
6347 /* Optimizes the LOOP.  Returns true if anything changed.  */
6348
6349 static bool
6350 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6351 {
6352   bool changed = false;
6353   struct iv_ca *iv_ca;
6354   edge exit;
6355   basic_block *body;
6356
6357   gcc_assert (!data->niters);
6358   data->current_loop = loop;
6359   data->speed = optimize_loop_for_speed_p (loop);
6360
6361   if (dump_file && (dump_flags & TDF_DETAILS))
6362     {
6363       fprintf (dump_file, "Processing loop %d\n", loop->num);
6364
6365       exit = single_dom_exit (loop);
6366       if (exit)
6367         {
6368           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6369                    exit->src->index, exit->dest->index);
6370           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6371           fprintf (dump_file, "\n");
6372         }
6373
6374       fprintf (dump_file, "\n");
6375     }
6376
6377   body = get_loop_body (loop);
6378   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6379   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6380   free (body);
6381
6382   /* For each ssa name determines whether it behaves as an induction variable
6383      in some loop.  */
6384   if (!find_induction_variables (data))
6385     goto finish;
6386
6387   /* Finds interesting uses (item 1).  */
6388   find_interesting_uses (data);
6389   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6390     goto finish;
6391
6392   /* Finds candidates for the induction variables (item 2).  */
6393   find_iv_candidates (data);
6394
6395   /* Calculates the costs (item 3, part 1).  */
6396   determine_iv_costs (data);
6397   determine_use_iv_costs (data);
6398   determine_set_costs (data);
6399
6400   /* Find the optimal set of induction variables (item 3, part 2).  */
6401   iv_ca = find_optimal_iv_set (data);
6402   if (!iv_ca)
6403     goto finish;
6404   changed = true;
6405
6406   /* Create the new induction variables (item 4, part 1).  */
6407   create_new_ivs (data, iv_ca);
6408   iv_ca_free (&iv_ca);
6409
6410   /* Rewrite the uses (item 4, part 2).  */
6411   rewrite_uses (data);
6412
6413   /* Remove the ivs that are unused after rewriting.  */
6414   remove_unused_ivs (data);
6415
6416   /* We have changed the structure of induction variables; it might happen
6417      that definitions in the scev database refer to some of them that were
6418      eliminated.  */
6419   scev_reset ();
6420
6421 finish:
6422   free_loop_data (data);
6423
6424   return changed;
6425 }
6426
6427 /* Main entry point.  Optimizes induction variables in loops.  */
6428
6429 void
6430 tree_ssa_iv_optimize (void)
6431 {
6432   struct loop *loop;
6433   struct ivopts_data data;
6434   loop_iterator li;
6435
6436   tree_ssa_iv_optimize_init (&data);
6437
6438   /* Optimize the loops starting with the innermost ones.  */
6439   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6440     {
6441       if (dump_file && (dump_flags & TDF_DETAILS))
6442         flow_loop_dump (loop, dump_file, NULL, 1);
6443
6444       tree_ssa_iv_optimize_loop (&data, loop);
6445     }
6446
6447   tree_ssa_iv_optimize_finalize (&data);
6448 }