gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "output.h"
  73 #include "tree-pretty-print.h"
  74 #include "gimple-pretty-print.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "tree-pass.h"
  80 #include "ggc.h"
  81 #include "insn-config.h"
  82 #include "recog.h"
  83 #include "pointer-set.h"
  84 #include "hashtab.h"
  85 #include "tree-chrec.h"
  86 #include "tree-scalar-evolution.h"
  87 #include "cfgloop.h"
  88 #include "params.h"
  89 #include "langhooks.h"
  90 #include "tree-affine.h"
  91 #include "target.h"
  92 #include "tree-inline.h"
  93 #include "tree-ssa-propagate.h"
  94
  95 /* FIXME: add_cost and zero_cost defined in exprmed.h conflict with local uses.
  96  */
  97 #include "expmed.h"
  98 #undef add_cost
  99 #undef zero_cost
 100
 101 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 102    cost of different addressing modes.  This should be moved to a TBD
 103    interface between the GIMPLE and RTL worlds.  */
 104 #include "expr.h"
 105
 106 /* The infinite cost.  */
 107 #define INFTY 10000000
 108
 109 #define AVG_LOOP_NITER(LOOP) 5
 110
 111 /* Returns the expected number of loop iterations for LOOP.
 112    The average trip count is computed from profile data if it
 113    exists. */
 114
 115 static inline HOST_WIDE_INT
 116 avg_loop_niter (struct loop *loop)
 117 {
 118   HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
 119   if (niter == -1)
 120     return AVG_LOOP_NITER (loop);
 121
 122   return niter;
 123 }
 124
 125 /* Representation of the induction variable.  */
 126 struct iv
 127 {
 128   tree base;            /* Initial value of the iv.  */
 129   tree base_object;     /* A memory object to that the induction variable points.  */
 130   tree step;            /* Step of the iv (constant only).  */
 131   tree ssa_name;        /* The ssa name with the value.  */
 132   bool biv_p;           /* Is it a biv?  */
 133   bool have_use_for;    /* Do we already have a use for it?  */
 134   unsigned use_id;      /* The identifier in the use if it is the case.  */
 135 };
 136
 137 /* Per-ssa version information (induction variable descriptions, etc.).  */
 138 struct version_info
 139 {
 140   tree name;            /* The ssa name.  */
 141   struct iv *iv;        /* Induction variable description.  */
 142   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 143                            an expression that is not an induction variable.  */
 144   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 145   unsigned inv_id;      /* Id of an invariant.  */
 146 };
 147
 148 /* Types of uses.  */
 149 enum use_type
 150 {
 151   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 152   USE_ADDRESS,          /* Use in an address.  */
 153   USE_COMPARE           /* Use is a compare.  */
 154 };
 155
 156 /* Cost of a computation.  */
 157 typedef struct
 158 {
 159   int cost;             /* The runtime cost.  */
 160   unsigned complexity;  /* The estimate of the complexity of the code for
 161                            the computation (in no concrete units --
 162                            complexity field should be larger for more
 163                            complex expressions and addressing modes).  */
 164 } comp_cost;
 165
 166 static const comp_cost zero_cost = {0, 0};
 167 static const comp_cost infinite_cost = {INFTY, INFTY};
 168
 169 /* The candidate - cost pair.  */
 170 struct cost_pair
 171 {
 172   struct iv_cand *cand; /* The candidate.  */
 173   comp_cost cost;       /* The cost.  */
 174   bitmap depends_on;    /* The list of invariants that have to be
 175                            preserved.  */
 176   tree value;           /* For final value elimination, the expression for
 177                            the final value of the iv.  For iv elimination,
 178                            the new bound to compare with.  */
 179   int inv_expr_id;      /* Loop invariant expression id.  */
 180 };
 181
 182 /* Use.  */
 183 struct iv_use
 184 {
 185   unsigned id;          /* The id of the use.  */
 186   enum use_type type;   /* Type of the use.  */
 187   struct iv *iv;        /* The induction variable it is based on.  */
 188   gimple stmt;          /* Statement in that it occurs.  */
 189   tree *op_p;           /* The place where it occurs.  */
 190   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 191                            important ones.  */
 192
 193   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 194   struct cost_pair *cost_map;
 195                         /* The costs wrto the iv candidates.  */
 196
 197   struct iv_cand *selected;
 198                         /* The selected candidate.  */
 199 };
 200
 201 /* The position where the iv is computed.  */
 202 enum iv_position
 203 {
 204   IP_NORMAL,            /* At the end, just before the exit condition.  */
 205   IP_END,               /* At the end of the latch block.  */
 206   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 207   IP_AFTER_USE,         /* Immediately after a specific use.  */
 208   IP_ORIGINAL           /* The original biv.  */
 209 };
 210
 211 /* The induction variable candidate.  */
 212 struct iv_cand
 213 {
 214   unsigned id;          /* The number of the candidate.  */
 215   bool important;       /* Whether this is an "important" candidate, i.e. such
 216                            that it should be considered by all uses.  */
 217   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 218   gimple incremented_at;/* For original biv, the statement where it is
 219                            incremented.  */
 220   tree var_before;      /* The variable used for it before increment.  */
 221   tree var_after;       /* The variable used for it after increment.  */
 222   struct iv *iv;        /* The value of the candidate.  NULL for
 223                            "pseudocandidate" used to indicate the possibility
 224                            to replace the final value of an iv by direct
 225                            computation of the value.  */
 226   unsigned cost;        /* Cost of the candidate.  */
 227   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 228   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 229                               where it is incremented.  */
 230   bitmap depends_on;    /* The list of invariants that are used in step of the
 231                            biv.  */
 232 };
 233
 234 /* Loop invariant expression hashtable entry.  */
 235 struct iv_inv_expr_ent
 236 {
 237   tree expr;
 238   int id;
 239   hashval_t hash;
 240 };
 241
 242 /* The data used by the induction variable optimizations.  */
 243
 244 typedef struct iv_use *iv_use_p;
 245 DEF_VEC_P(iv_use_p);
 246 DEF_VEC_ALLOC_P(iv_use_p,heap);
 247
 248 typedef struct iv_cand *iv_cand_p;
 249 DEF_VEC_P(iv_cand_p);
 250 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 251
 252 struct ivopts_data
 253 {
 254   /* The currently optimized loop.  */
 255   struct loop *current_loop;
 256
 257   /* Numbers of iterations for all exits of the current loop.  */
 258   struct pointer_map_t *niters;
 259
 260   /* Number of registers used in it.  */
 261   unsigned regs_used;
 262
 263   /* The size of version_info array allocated.  */
 264   unsigned version_info_size;
 265
 266   /* The array of information for the ssa names.  */
 267   struct version_info *version_info;
 268
 269   /* The hashtable of loop invariant expressions created
 270      by ivopt.  */
 271   htab_t inv_expr_tab;
 272
 273   /* Loop invariant expression id.  */
 274   int inv_expr_id;
 275
 276   /* The bitmap of indices in version_info whose value was changed.  */
 277   bitmap relevant;
 278
 279   /* The uses of induction variables.  */
 280   VEC(iv_use_p,heap) *iv_uses;
 281
 282   /* The candidates.  */
 283   VEC(iv_cand_p,heap) *iv_candidates;
 284
 285   /* A bitmap of important candidates.  */
 286   bitmap important_candidates;
 287
 288   /* The maximum invariant id.  */
 289   unsigned max_inv_id;
 290
 291   /* Whether to consider just related and important candidates when replacing a
 292      use.  */
 293   bool consider_all_candidates;
 294
 295   /* Are we optimizing for speed?  */
 296   bool speed;
 297
 298   /* Whether the loop body includes any function calls.  */
 299   bool body_includes_call;
 300 };
 301
 302 /* An assignment of iv candidates to uses.  */
 303
 304 struct iv_ca
 305 {
 306   /* The number of uses covered by the assignment.  */
 307   unsigned upto;
 308
 309   /* Number of uses that cannot be expressed by the candidates in the set.  */
 310   unsigned bad_uses;
 311
 312   /* Candidate assigned to a use, together with the related costs.  */
 313   struct cost_pair **cand_for_use;
 314
 315   /* Number of times each candidate is used.  */
 316   unsigned *n_cand_uses;
 317
 318   /* The candidates used.  */
 319   bitmap cands;
 320
 321   /* The number of candidates in the set.  */
 322   unsigned n_cands;
 323
 324   /* Total number of registers needed.  */
 325   unsigned n_regs;
 326
 327   /* Total cost of expressing uses.  */
 328   comp_cost cand_use_cost;
 329
 330   /* Total cost of candidates.  */
 331   unsigned cand_cost;
 332
 333   /* Number of times each invariant is used.  */
 334   unsigned *n_invariant_uses;
 335
 336   /* The array holding the number of uses of each loop
 337      invariant expressions created by ivopt.  */
 338   unsigned *used_inv_expr;
 339
 340   /* The number of created loop invariants.  */
 341   unsigned num_used_inv_expr;
 342
 343   /* Total cost of the assignment.  */
 344   comp_cost cost;
 345 };
 346
 347 /* Difference of two iv candidate assignments.  */
 348
 349 struct iv_ca_delta
 350 {
 351   /* Changed use.  */
 352   struct iv_use *use;
 353
 354   /* An old assignment (for rollback purposes).  */
 355   struct cost_pair *old_cp;
 356
 357   /* A new assignment.  */
 358   struct cost_pair *new_cp;
 359
 360   /* Next change in the list.  */
 361   struct iv_ca_delta *next_change;
 362 };
 363
 364 /* Bound on number of candidates below that all candidates are considered.  */
 365
 366 #define CONSIDER_ALL_CANDIDATES_BOUND \
 367   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 368
 369 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 370    optimizing such a loop would help, and it would take ages).  */
 371
 372 #define MAX_CONSIDERED_USES \
 373   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 374
 375 /* If there are at most this number of ivs in the set, try removing unnecessary
 376    ivs from the set always.  */
 377
 378 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 379   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 380
 381 /* The list of trees for that the decl_rtl field must be reset is stored
 382    here.  */
 383
 384 static VEC(tree,heap) *decl_rtl_to_reset;
 385
 386 static comp_cost force_expr_to_var_cost (tree, bool);
 387
 388 /* Number of uses recorded in DATA.  */
 389
 390 static inline unsigned
 391 n_iv_uses (struct ivopts_data *data)
 392 {
 393   return VEC_length (iv_use_p, data->iv_uses);
 394 }
 395
 396 /* Ith use recorded in DATA.  */
 397
 398 static inline struct iv_use *
 399 iv_use (struct ivopts_data *data, unsigned i)
 400 {
 401   return VEC_index (iv_use_p, data->iv_uses, i);
 402 }
 403
 404 /* Number of candidates recorded in DATA.  */
 405
 406 static inline unsigned
 407 n_iv_cands (struct ivopts_data *data)
 408 {
 409   return VEC_length (iv_cand_p, data->iv_candidates);
 410 }
 411
 412 /* Ith candidate recorded in DATA.  */
 413
 414 static inline struct iv_cand *
 415 iv_cand (struct ivopts_data *data, unsigned i)
 416 {
 417   return VEC_index (iv_cand_p, data->iv_candidates, i);
 418 }
 419
 420 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 421
 422 edge
 423 single_dom_exit (struct loop *loop)
 424 {
 425   edge exit = single_exit (loop);
 426
 427   if (!exit)
 428     return NULL;
 429
 430   if (!just_once_each_iteration_p (loop, exit->src))
 431     return NULL;
 432
 433   return exit;
 434 }
 435
 436 /* Dumps information about the induction variable IV to FILE.  */
 437
 438 extern void dump_iv (FILE *, struct iv *);
 439 void
 440 dump_iv (FILE *file, struct iv *iv)
 441 {
 442   if (iv->ssa_name)
 443     {
 444       fprintf (file, "ssa name ");
 445       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 446       fprintf (file, "\n");
 447     }
 448
 449   fprintf (file, "  type ");
 450   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 451   fprintf (file, "\n");
 452
 453   if (iv->step)
 454     {
 455       fprintf (file, "  base ");
 456       print_generic_expr (file, iv->base, TDF_SLIM);
 457       fprintf (file, "\n");
 458
 459       fprintf (file, "  step ");
 460       print_generic_expr (file, iv->step, TDF_SLIM);
 461       fprintf (file, "\n");
 462     }
 463   else
 464     {
 465       fprintf (file, "  invariant ");
 466       print_generic_expr (file, iv->base, TDF_SLIM);
 467       fprintf (file, "\n");
 468     }
 469
 470   if (iv->base_object)
 471     {
 472       fprintf (file, "  base object ");
 473       print_generic_expr (file, iv->base_object, TDF_SLIM);
 474       fprintf (file, "\n");
 475     }
 476
 477   if (iv->biv_p)
 478     fprintf (file, "  is a biv\n");
 479 }
 480
 481 /* Dumps information about the USE to FILE.  */
 482
 483 extern void dump_use (FILE *, struct iv_use *);
 484 void
 485 dump_use (FILE *file, struct iv_use *use)
 486 {
 487   fprintf (file, "use %d\n", use->id);
 488
 489   switch (use->type)
 490     {
 491     case USE_NONLINEAR_EXPR:
 492       fprintf (file, "  generic\n");
 493       break;
 494
 495     case USE_ADDRESS:
 496       fprintf (file, "  address\n");
 497       break;
 498
 499     case USE_COMPARE:
 500       fprintf (file, "  compare\n");
 501       break;
 502
 503     default:
 504       gcc_unreachable ();
 505     }
 506
 507   fprintf (file, "  in statement ");
 508   print_gimple_stmt (file, use->stmt, 0, 0);
 509   fprintf (file, "\n");
 510
 511   fprintf (file, "  at position ");
 512   if (use->op_p)
 513     print_generic_expr (file, *use->op_p, TDF_SLIM);
 514   fprintf (file, "\n");
 515
 516   dump_iv (file, use->iv);
 517
 518   if (use->related_cands)
 519     {
 520       fprintf (file, "  related candidates ");
 521       dump_bitmap (file, use->related_cands);
 522     }
 523 }
 524
 525 /* Dumps information about the uses to FILE.  */
 526
 527 extern void dump_uses (FILE *, struct ivopts_data *);
 528 void
 529 dump_uses (FILE *file, struct ivopts_data *data)
 530 {
 531   unsigned i;
 532   struct iv_use *use;
 533
 534   for (i = 0; i < n_iv_uses (data); i++)
 535     {
 536       use = iv_use (data, i);
 537
 538       dump_use (file, use);
 539       fprintf (file, "\n");
 540     }
 541 }
 542
 543 /* Dumps information about induction variable candidate CAND to FILE.  */
 544
 545 extern void dump_cand (FILE *, struct iv_cand *);
 546 void
 547 dump_cand (FILE *file, struct iv_cand *cand)
 548 {
 549   struct iv *iv = cand->iv;
 550
 551   fprintf (file, "candidate %d%s\n",
 552            cand->id, cand->important ? " (important)" : "");
 553
 554   if (cand->depends_on)
 555     {
 556       fprintf (file, "  depends on ");
 557       dump_bitmap (file, cand->depends_on);
 558     }
 559
 560   if (!iv)
 561     {
 562       fprintf (file, "  final value replacement\n");
 563       return;
 564     }
 565
 566   if (cand->var_before)
 567     {
 568       fprintf (file, "  var_before ");
 569       print_generic_expr (file, cand->var_before, TDF_SLIM);
 570       fprintf (file, "\n");
 571     }
 572   if (cand->var_after)
 573     {
 574       fprintf (file, "  var_after ");
 575       print_generic_expr (file, cand->var_after, TDF_SLIM);
 576       fprintf (file, "\n");
 577     }
 578
 579   switch (cand->pos)
 580     {
 581     case IP_NORMAL:
 582       fprintf (file, "  incremented before exit test\n");
 583       break;
 584
 585     case IP_BEFORE_USE:
 586       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 587       break;
 588
 589     case IP_AFTER_USE:
 590       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 591       break;
 592
 593     case IP_END:
 594       fprintf (file, "  incremented at end\n");
 595       break;
 596
 597     case IP_ORIGINAL:
 598       fprintf (file, "  original biv\n");
 599       break;
 600     }
 601
 602   dump_iv (file, iv);
 603 }
 604
 605 /* Returns the info for ssa version VER.  */
 606
 607 static inline struct version_info *
 608 ver_info (struct ivopts_data *data, unsigned ver)
 609 {
 610   return data->version_info + ver;
 611 }
 612
 613 /* Returns the info for ssa name NAME.  */
 614
 615 static inline struct version_info *
 616 name_info (struct ivopts_data *data, tree name)
 617 {
 618   return ver_info (data, SSA_NAME_VERSION (name));
 619 }
 620
 621 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 622    emitted in LOOP.  */
 623
 624 static bool
 625 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 626 {
 627   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 628
 629   gcc_assert (bb);
 630
 631   if (sbb == loop->latch)
 632     return true;
 633
 634   if (sbb != bb)
 635     return false;
 636
 637   return stmt == last_stmt (bb);
 638 }
 639
 640 /* Returns true if STMT if after the place where the original induction
 641    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 642    if the positions are identical.  */
 643
 644 static bool
 645 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 646 {
 647   basic_block cand_bb = gimple_bb (cand->incremented_at);
 648   basic_block stmt_bb = gimple_bb (stmt);
 649
 650   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 651     return false;
 652
 653   if (stmt_bb != cand_bb)
 654     return true;
 655
 656   if (true_if_equal
 657       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 658     return true;
 659   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 660 }
 661
 662 /* Returns true if STMT if after the place where the induction variable
 663    CAND is incremented in LOOP.  */
 664
 665 static bool
 666 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 667 {
 668   switch (cand->pos)
 669     {
 670     case IP_END:
 671       return false;
 672
 673     case IP_NORMAL:
 674       return stmt_after_ip_normal_pos (loop, stmt);
 675
 676     case IP_ORIGINAL:
 677     case IP_AFTER_USE:
 678       return stmt_after_inc_pos (cand, stmt, false);
 679
 680     case IP_BEFORE_USE:
 681       return stmt_after_inc_pos (cand, stmt, true);
 682
 683     default:
 684       gcc_unreachable ();
 685     }
 686 }
 687
 688 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 689
 690 static bool
 691 abnormal_ssa_name_p (tree exp)
 692 {
 693   if (!exp)
 694     return false;
 695
 696   if (TREE_CODE (exp) != SSA_NAME)
 697     return false;
 698
 699   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 700 }
 701
 702 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 703    abnormal phi node.  Callback for for_each_index.  */
 704
 705 static bool
 706 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 707                                   void *data ATTRIBUTE_UNUSED)
 708 {
 709   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 710     {
 711       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 712         return false;
 713       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 714         return false;
 715     }
 716
 717   return !abnormal_ssa_name_p (*index);
 718 }
 719
 720 /* Returns true if EXPR contains a ssa name that occurs in an
 721    abnormal phi node.  */
 722
 723 bool
 724 contains_abnormal_ssa_name_p (tree expr)
 725 {
 726   enum tree_code code;
 727   enum tree_code_class codeclass;
 728
 729   if (!expr)
 730     return false;
 731
 732   code = TREE_CODE (expr);
 733   codeclass = TREE_CODE_CLASS (code);
 734
 735   if (code == SSA_NAME)
 736     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 737
 738   if (code == INTEGER_CST
 739       || is_gimple_min_invariant (expr))
 740     return false;
 741
 742   if (code == ADDR_EXPR)
 743     return !for_each_index (&TREE_OPERAND (expr, 0),
 744                             idx_contains_abnormal_ssa_name_p,
 745                             NULL);
 746
 747   if (code == COND_EXPR)
 748     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 749       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 750       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 751
 752   switch (codeclass)
 753     {
 754     case tcc_binary:
 755     case tcc_comparison:
 756       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 757         return true;
 758
 759       /* Fallthru.  */
 760     case tcc_unary:
 761       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 762         return true;
 763
 764       break;
 765
 766     default:
 767       gcc_unreachable ();
 768     }
 769
 770   return false;
 771 }
 772
 773 /*  Returns tree describing number of iterations determined from
 774     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 775
 776 static tree
 777 niter_for_exit (struct ivopts_data *data, edge exit,
 778                 struct tree_niter_desc **desc_p)
 779 {
 780   struct tree_niter_desc* desc = NULL;
 781   tree niter;
 782   void **slot;
 783
 784   if (!data->niters)
 785     {
 786       data->niters = pointer_map_create ();
 787       slot = NULL;
 788     }
 789   else
 790     slot = pointer_map_contains (data->niters, exit);
 791
 792   if (!slot)
 793     {
 794       /* Try to determine number of iterations.  We must know it
 795          unconditionally (i.e., without possibility of # of iterations
 796          being zero).  Also, we cannot safely work with ssa names that
 797          appear in phi nodes on abnormal edges, so that we do not create
 798          overlapping life ranges for them (PR 27283).  */
 799       desc = XNEW (struct tree_niter_desc);
 800       if (number_of_iterations_exit (data->current_loop,
 801                                      exit, desc, true)
 802           && integer_zerop (desc->may_be_zero)
 803           && !contains_abnormal_ssa_name_p (desc->niter))
 804         niter = desc->niter;
 805       else
 806         niter = NULL_TREE;
 807
 808       desc->niter = niter;
 809       slot = pointer_map_insert (data->niters, exit);
 810       *slot = desc;
 811     }
 812   else
 813     niter = ((struct tree_niter_desc *) *slot)->niter;
 814
 815   if (desc_p)
 816     *desc_p = (struct tree_niter_desc *) *slot;
 817   return niter;
 818 }
 819
 820 /* Returns tree describing number of iterations determined from
 821    single dominating exit of DATA->current_loop, or NULL if something
 822    goes wrong.  */
 823
 824 static tree
 825 niter_for_single_dom_exit (struct ivopts_data *data)
 826 {
 827   edge exit = single_dom_exit (data->current_loop);
 828
 829   if (!exit)
 830     return NULL;
 831
 832   return niter_for_exit (data, exit, NULL);
 833 }
 834
 835 /* Hash table equality function for expressions.  */
 836
 837 static int
 838 htab_inv_expr_eq (const void *ent1, const void *ent2)
 839 {
 840   const struct iv_inv_expr_ent *expr1 =
 841       (const struct iv_inv_expr_ent *)ent1;
 842   const struct iv_inv_expr_ent *expr2 =
 843       (const struct iv_inv_expr_ent *)ent2;
 844
 845   return expr1->hash == expr2->hash
 846          && operand_equal_p (expr1->expr, expr2->expr, 0);
 847 }
 848
 849 /* Hash function for loop invariant expressions.  */
 850
 851 static hashval_t
 852 htab_inv_expr_hash (const void *ent)
 853 {
 854   const struct iv_inv_expr_ent *expr =
 855       (const struct iv_inv_expr_ent *)ent;
 856   return expr->hash;
 857 }
 858
 859 /* Initializes data structures used by the iv optimization pass, stored
 860    in DATA.  */
 861
 862 static void
 863 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 864 {
 865   data->version_info_size = 2 * num_ssa_names;
 866   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 867   data->relevant = BITMAP_ALLOC (NULL);
 868   data->important_candidates = BITMAP_ALLOC (NULL);
 869   data->max_inv_id = 0;
 870   data->niters = NULL;
 871   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 872   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 873   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 874                                     htab_inv_expr_eq, free);
 875   data->inv_expr_id = 0;
 876   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 877 }
 878
 879 /* Returns a memory object to that EXPR points.  In case we are able to
 880    determine that it does not point to any such object, NULL is returned.  */
 881
 882 static tree
 883 determine_base_object (tree expr)
 884 {
 885   enum tree_code code = TREE_CODE (expr);
 886   tree base, obj;
 887
 888   /* If this is a pointer casted to any type, we need to determine
 889      the base object for the pointer; so handle conversions before
 890      throwing away non-pointer expressions.  */
 891   if (CONVERT_EXPR_P (expr))
 892     return determine_base_object (TREE_OPERAND (expr, 0));
 893
 894   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 895     return NULL_TREE;
 896
 897   switch (code)
 898     {
 899     case INTEGER_CST:
 900       return NULL_TREE;
 901
 902     case ADDR_EXPR:
 903       obj = TREE_OPERAND (expr, 0);
 904       base = get_base_address (obj);
 905
 906       if (!base)
 907         return expr;
 908
 909       if (TREE_CODE (base) == MEM_REF)
 910         return determine_base_object (TREE_OPERAND (base, 0));
 911
 912       return fold_convert (ptr_type_node,
 913                            build_fold_addr_expr (base));
 914
 915     case POINTER_PLUS_EXPR:
 916       return determine_base_object (TREE_OPERAND (expr, 0));
 917
 918     case PLUS_EXPR:
 919     case MINUS_EXPR:
 920       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 921       gcc_unreachable ();
 922
 923     default:
 924       return fold_convert (ptr_type_node, expr);
 925     }
 926 }
 927
 928 /* Allocates an induction variable with given initial value BASE and step STEP
 929    for loop LOOP.  */
 930
 931 static struct iv *
 932 alloc_iv (tree base, tree step)
 933 {
 934   struct iv *iv = XCNEW (struct iv);
 935   gcc_assert (step != NULL_TREE);
 936
 937   iv->base = base;
 938   iv->base_object = determine_base_object (base);
 939   iv->step = step;
 940   iv->biv_p = false;
 941   iv->have_use_for = false;
 942   iv->use_id = 0;
 943   iv->ssa_name = NULL_TREE;
 944
 945   return iv;
 946 }
 947
 948 /* Sets STEP and BASE for induction variable IV.  */
 949
 950 static void
 951 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 952 {
 953   struct version_info *info = name_info (data, iv);
 954
 955   gcc_assert (!info->iv);
 956
 957   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 958   info->iv = alloc_iv (base, step);
 959   info->iv->ssa_name = iv;
 960 }
 961
 962 /* Finds induction variable declaration for VAR.  */
 963
 964 static struct iv *
 965 get_iv (struct ivopts_data *data, tree var)
 966 {
 967   basic_block bb;
 968   tree type = TREE_TYPE (var);
 969
 970   if (!POINTER_TYPE_P (type)
 971       && !INTEGRAL_TYPE_P (type))
 972     return NULL;
 973
 974   if (!name_info (data, var)->iv)
 975     {
 976       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 977
 978       if (!bb
 979           || !flow_bb_inside_loop_p (data->current_loop, bb))
 980         set_iv (data, var, var, build_int_cst (type, 0));
 981     }
 982
 983   return name_info (data, var)->iv;
 984 }
 985
 986 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 987    not define a simple affine biv with nonzero step.  */
 988
 989 static tree
 990 determine_biv_step (gimple phi)
 991 {
 992   struct loop *loop = gimple_bb (phi)->loop_father;
 993   tree name = PHI_RESULT (phi);
 994   affine_iv iv;
 995
 996   if (!is_gimple_reg (name))
 997     return NULL_TREE;
 998
 999   if (!simple_iv (loop, loop, name, &iv, true))
1000     return NULL_TREE;
1001
1002   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1003 }
1004
1005 /* Finds basic ivs.  */
1006
1007 static bool
1008 find_bivs (struct ivopts_data *data)
1009 {
1010   gimple phi;
1011   tree step, type, base;
1012   bool found = false;
1013   struct loop *loop = data->current_loop;
1014   gimple_stmt_iterator psi;
1015
1016   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1017     {
1018       phi = gsi_stmt (psi);
1019
1020       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1021         continue;
1022
1023       step = determine_biv_step (phi);
1024       if (!step)
1025         continue;
1026
1027       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1028       base = expand_simple_operations (base);
1029       if (contains_abnormal_ssa_name_p (base)
1030           || contains_abnormal_ssa_name_p (step))
1031         continue;
1032
1033       type = TREE_TYPE (PHI_RESULT (phi));
1034       base = fold_convert (type, base);
1035       if (step)
1036         {
1037           if (POINTER_TYPE_P (type))
1038             step = fold_convert (sizetype, step);
1039           else
1040             step = fold_convert (type, step);
1041         }
1042
1043       set_iv (data, PHI_RESULT (phi), base, step);
1044       found = true;
1045     }
1046
1047   return found;
1048 }
1049
1050 /* Marks basic ivs.  */
1051
1052 static void
1053 mark_bivs (struct ivopts_data *data)
1054 {
1055   gimple phi;
1056   tree var;
1057   struct iv *iv, *incr_iv;
1058   struct loop *loop = data->current_loop;
1059   basic_block incr_bb;
1060   gimple_stmt_iterator psi;
1061
1062   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1063     {
1064       phi = gsi_stmt (psi);
1065
1066       iv = get_iv (data, PHI_RESULT (phi));
1067       if (!iv)
1068         continue;
1069
1070       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1071       incr_iv = get_iv (data, var);
1072       if (!incr_iv)
1073         continue;
1074
1075       /* If the increment is in the subloop, ignore it.  */
1076       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1077       if (incr_bb->loop_father != data->current_loop
1078           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1079         continue;
1080
1081       iv->biv_p = true;
1082       incr_iv->biv_p = true;
1083     }
1084 }
1085
1086 /* Checks whether STMT defines a linear induction variable and stores its
1087    parameters to IV.  */
1088
1089 static bool
1090 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1091 {
1092   tree lhs;
1093   struct loop *loop = data->current_loop;
1094
1095   iv->base = NULL_TREE;
1096   iv->step = NULL_TREE;
1097
1098   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1099     return false;
1100
1101   lhs = gimple_assign_lhs (stmt);
1102   if (TREE_CODE (lhs) != SSA_NAME)
1103     return false;
1104
1105   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1106     return false;
1107   iv->base = expand_simple_operations (iv->base);
1108
1109   if (contains_abnormal_ssa_name_p (iv->base)
1110       || contains_abnormal_ssa_name_p (iv->step))
1111     return false;
1112
1113   /* If STMT could throw, then do not consider STMT as defining a GIV.
1114      While this will suppress optimizations, we can not safely delete this
1115      GIV and associated statements, even if it appears it is not used.  */
1116   if (stmt_could_throw_p (stmt))
1117     return false;
1118
1119   return true;
1120 }
1121
1122 /* Finds general ivs in statement STMT.  */
1123
1124 static void
1125 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1126 {
1127   affine_iv iv;
1128
1129   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1130     return;
1131
1132   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1133 }
1134
1135 /* Finds general ivs in basic block BB.  */
1136
1137 static void
1138 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1139 {
1140   gimple_stmt_iterator bsi;
1141
1142   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1143     find_givs_in_stmt (data, gsi_stmt (bsi));
1144 }
1145
1146 /* Finds general ivs.  */
1147
1148 static void
1149 find_givs (struct ivopts_data *data)
1150 {
1151   struct loop *loop = data->current_loop;
1152   basic_block *body = get_loop_body_in_dom_order (loop);
1153   unsigned i;
1154
1155   for (i = 0; i < loop->num_nodes; i++)
1156     find_givs_in_bb (data, body[i]);
1157   free (body);
1158 }
1159
1160 /* For each ssa name defined in LOOP determines whether it is an induction
1161    variable and if so, its initial value and step.  */
1162
1163 static bool
1164 find_induction_variables (struct ivopts_data *data)
1165 {
1166   unsigned i;
1167   bitmap_iterator bi;
1168
1169   if (!find_bivs (data))
1170     return false;
1171
1172   find_givs (data);
1173   mark_bivs (data);
1174
1175   if (dump_file && (dump_flags & TDF_DETAILS))
1176     {
1177       tree niter = niter_for_single_dom_exit (data);
1178
1179       if (niter)
1180         {
1181           fprintf (dump_file, "  number of iterations ");
1182           print_generic_expr (dump_file, niter, TDF_SLIM);
1183           fprintf (dump_file, "\n\n");
1184         };
1185
1186       fprintf (dump_file, "Induction variables:\n\n");
1187
1188       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1189         {
1190           if (ver_info (data, i)->iv)
1191             dump_iv (dump_file, ver_info (data, i)->iv);
1192         }
1193     }
1194
1195   return true;
1196 }
1197
1198 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1199
1200 static struct iv_use *
1201 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1202             gimple stmt, enum use_type use_type)
1203 {
1204   struct iv_use *use = XCNEW (struct iv_use);
1205
1206   use->id = n_iv_uses (data);
1207   use->type = use_type;
1208   use->iv = iv;
1209   use->stmt = stmt;
1210   use->op_p = use_p;
1211   use->related_cands = BITMAP_ALLOC (NULL);
1212
1213   /* To avoid showing ssa name in the dumps, if it was not reset by the
1214      caller.  */
1215   iv->ssa_name = NULL_TREE;
1216
1217   if (dump_file && (dump_flags & TDF_DETAILS))
1218     dump_use (dump_file, use);
1219
1220   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1221
1222   return use;
1223 }
1224
1225 /* Checks whether OP is a loop-level invariant and if so, records it.
1226    NONLINEAR_USE is true if the invariant is used in a way we do not
1227    handle specially.  */
1228
1229 static void
1230 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1231 {
1232   basic_block bb;
1233   struct version_info *info;
1234
1235   if (TREE_CODE (op) != SSA_NAME
1236       || !is_gimple_reg (op))
1237     return;
1238
1239   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1240   if (bb
1241       && flow_bb_inside_loop_p (data->current_loop, bb))
1242     return;
1243
1244   info = name_info (data, op);
1245   info->name = op;
1246   info->has_nonlin_use |= nonlinear_use;
1247   if (!info->inv_id)
1248     info->inv_id = ++data->max_inv_id;
1249   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1250 }
1251
1252 /* Checks whether the use OP is interesting and if so, records it.  */
1253
1254 static struct iv_use *
1255 find_interesting_uses_op (struct ivopts_data *data, tree op)
1256 {
1257   struct iv *iv;
1258   struct iv *civ;
1259   gimple stmt;
1260   struct iv_use *use;
1261
1262   if (TREE_CODE (op) != SSA_NAME)
1263     return NULL;
1264
1265   iv = get_iv (data, op);
1266   if (!iv)
1267     return NULL;
1268
1269   if (iv->have_use_for)
1270     {
1271       use = iv_use (data, iv->use_id);
1272
1273       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1274       return use;
1275     }
1276
1277   if (integer_zerop (iv->step))
1278     {
1279       record_invariant (data, op, true);
1280       return NULL;
1281     }
1282   iv->have_use_for = true;
1283
1284   civ = XNEW (struct iv);
1285   *civ = *iv;
1286
1287   stmt = SSA_NAME_DEF_STMT (op);
1288   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1289               || is_gimple_assign (stmt));
1290
1291   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1292   iv->use_id = use->id;
1293
1294   return use;
1295 }
1296
1297 /* Given a condition in statement STMT, checks whether it is a compare
1298    of an induction variable and an invariant.  If this is the case,
1299    CONTROL_VAR is set to location of the iv, BOUND to the location of
1300    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1301    induction variable descriptions, and true is returned.  If this is not
1302    the case, CONTROL_VAR and BOUND are set to the arguments of the
1303    condition and false is returned.  */
1304
1305 static bool
1306 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1307                        tree **control_var, tree **bound,
1308                        struct iv **iv_var, struct iv **iv_bound)
1309 {
1310   /* The objects returned when COND has constant operands.  */
1311   static struct iv const_iv;
1312   static tree zero;
1313   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1314   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1315   bool ret = false;
1316
1317   if (gimple_code (stmt) == GIMPLE_COND)
1318     {
1319       op0 = gimple_cond_lhs_ptr (stmt);
1320       op1 = gimple_cond_rhs_ptr (stmt);
1321     }
1322   else
1323     {
1324       op0 = gimple_assign_rhs1_ptr (stmt);
1325       op1 = gimple_assign_rhs2_ptr (stmt);
1326     }
1327
1328   zero = integer_zero_node;
1329   const_iv.step = integer_zero_node;
1330
1331   if (TREE_CODE (*op0) == SSA_NAME)
1332     iv0 = get_iv (data, *op0);
1333   if (TREE_CODE (*op1) == SSA_NAME)
1334     iv1 = get_iv (data, *op1);
1335
1336   /* Exactly one of the compared values must be an iv, and the other one must
1337      be an invariant.  */
1338   if (!iv0 || !iv1)
1339     goto end;
1340
1341   if (integer_zerop (iv0->step))
1342     {
1343       /* Control variable may be on the other side.  */
1344       tmp_op = op0; op0 = op1; op1 = tmp_op;
1345       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1346     }
1347   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1348
1349 end:
1350   if (control_var)
1351     *control_var = op0;;
1352   if (iv_var)
1353     *iv_var = iv0;;
1354   if (bound)
1355     *bound = op1;
1356   if (iv_bound)
1357     *iv_bound = iv1;
1358
1359   return ret;
1360 }
1361
1362 /* Checks whether the condition in STMT is interesting and if so,
1363    records it.  */
1364
1365 static void
1366 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1367 {
1368   tree *var_p, *bound_p;
1369   struct iv *var_iv, *civ;
1370
1371   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1372     {
1373       find_interesting_uses_op (data, *var_p);
1374       find_interesting_uses_op (data, *bound_p);
1375       return;
1376     }
1377
1378   civ = XNEW (struct iv);
1379   *civ = *var_iv;
1380   record_use (data, NULL, civ, stmt, USE_COMPARE);
1381 }
1382
1383 /* Returns true if expression EXPR is obviously invariant in LOOP,
1384    i.e. if all its operands are defined outside of the LOOP.  LOOP
1385    should not be the function body.  */
1386
1387 bool
1388 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1389 {
1390   basic_block def_bb;
1391   unsigned i, len;
1392
1393   gcc_assert (loop_depth (loop) > 0);
1394
1395   if (is_gimple_min_invariant (expr))
1396     return true;
1397
1398   if (TREE_CODE (expr) == SSA_NAME)
1399     {
1400       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1401       if (def_bb
1402           && flow_bb_inside_loop_p (loop, def_bb))
1403         return false;
1404
1405       return true;
1406     }
1407
1408   if (!EXPR_P (expr))
1409     return false;
1410
1411   len = TREE_OPERAND_LENGTH (expr);
1412   for (i = 0; i < len; i++)
1413     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1414       return false;
1415
1416   return true;
1417 }
1418
1419 /* Returns true if statement STMT is obviously invariant in LOOP,
1420    i.e. if all its operands on the RHS are defined outside of the LOOP.
1421    LOOP should not be the function body.  */
1422
1423 bool
1424 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1425 {
1426   unsigned i;
1427   tree lhs;
1428
1429   gcc_assert (loop_depth (loop) > 0);
1430
1431   lhs = gimple_get_lhs (stmt);
1432   for (i = 0; i < gimple_num_ops (stmt); i++)
1433     {
1434       tree op = gimple_op (stmt, i);
1435       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1436         return false;
1437     }
1438
1439   return true;
1440 }
1441
1442 /* Cumulates the steps of indices into DATA and replaces their values with the
1443    initial ones.  Returns false when the value of the index cannot be determined.
1444    Callback for for_each_index.  */
1445
1446 struct ifs_ivopts_data
1447 {
1448   struct ivopts_data *ivopts_data;
1449   gimple stmt;
1450   tree step;
1451 };
1452
1453 static bool
1454 idx_find_step (tree base, tree *idx, void *data)
1455 {
1456   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1457   struct iv *iv;
1458   tree step, iv_base, iv_step, lbound, off;
1459   struct loop *loop = dta->ivopts_data->current_loop;
1460
1461   /* If base is a component ref, require that the offset of the reference
1462      be invariant.  */
1463   if (TREE_CODE (base) == COMPONENT_REF)
1464     {
1465       off = component_ref_field_offset (base);
1466       return expr_invariant_in_loop_p (loop, off);
1467     }
1468
1469   /* If base is array, first check whether we will be able to move the
1470      reference out of the loop (in order to take its address in strength
1471      reduction).  In order for this to work we need both lower bound
1472      and step to be loop invariants.  */
1473   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1474     {
1475       /* Moreover, for a range, the size needs to be invariant as well.  */
1476       if (TREE_CODE (base) == ARRAY_RANGE_REF
1477           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1478         return false;
1479
1480       step = array_ref_element_size (base);
1481       lbound = array_ref_low_bound (base);
1482
1483       if (!expr_invariant_in_loop_p (loop, step)
1484           || !expr_invariant_in_loop_p (loop, lbound))
1485         return false;
1486     }
1487
1488   if (TREE_CODE (*idx) != SSA_NAME)
1489     return true;
1490
1491   iv = get_iv (dta->ivopts_data, *idx);
1492   if (!iv)
1493     return false;
1494
1495   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1496           *&x[0], which is not folded and does not trigger the
1497           ARRAY_REF path below.  */
1498   *idx = iv->base;
1499
1500   if (integer_zerop (iv->step))
1501     return true;
1502
1503   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1504     {
1505       step = array_ref_element_size (base);
1506
1507       /* We only handle addresses whose step is an integer constant.  */
1508       if (TREE_CODE (step) != INTEGER_CST)
1509         return false;
1510     }
1511   else
1512     /* The step for pointer arithmetics already is 1 byte.  */
1513     step = size_one_node;
1514
1515   iv_base = iv->base;
1516   iv_step = iv->step;
1517   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1518                             sizetype, &iv_base, &iv_step, dta->stmt,
1519                             false))
1520     {
1521       /* The index might wrap.  */
1522       return false;
1523     }
1524
1525   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1526   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1527
1528   return true;
1529 }
1530
1531 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1532    object is passed to it in DATA.  */
1533
1534 static bool
1535 idx_record_use (tree base, tree *idx,
1536                 void *vdata)
1537 {
1538   struct ivopts_data *data = (struct ivopts_data *) vdata;
1539   find_interesting_uses_op (data, *idx);
1540   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1541     {
1542       find_interesting_uses_op (data, array_ref_element_size (base));
1543       find_interesting_uses_op (data, array_ref_low_bound (base));
1544     }
1545   return true;
1546 }
1547
1548 /* If we can prove that TOP = cst * BOT for some constant cst,
1549    store cst to MUL and return true.  Otherwise return false.
1550    The returned value is always sign-extended, regardless of the
1551    signedness of TOP and BOT.  */
1552
1553 static bool
1554 constant_multiple_of (tree top, tree bot, double_int *mul)
1555 {
1556   tree mby;
1557   enum tree_code code;
1558   double_int res, p0, p1;
1559   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1560
1561   STRIP_NOPS (top);
1562   STRIP_NOPS (bot);
1563
1564   if (operand_equal_p (top, bot, 0))
1565     {
1566       *mul = double_int_one;
1567       return true;
1568     }
1569
1570   code = TREE_CODE (top);
1571   switch (code)
1572     {
1573     case MULT_EXPR:
1574       mby = TREE_OPERAND (top, 1);
1575       if (TREE_CODE (mby) != INTEGER_CST)
1576         return false;
1577
1578       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1579         return false;
1580
1581       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1582                               precision);
1583       return true;
1584
1585     case PLUS_EXPR:
1586     case MINUS_EXPR:
1587       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1588           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1589         return false;
1590
1591       if (code == MINUS_EXPR)
1592         p1 = double_int_neg (p1);
1593       *mul = double_int_sext (double_int_add (p0, p1), precision);
1594       return true;
1595
1596     case INTEGER_CST:
1597       if (TREE_CODE (bot) != INTEGER_CST)
1598         return false;
1599
1600       p0 = double_int_sext (tree_to_double_int (top), precision);
1601       p1 = double_int_sext (tree_to_double_int (bot), precision);
1602       if (double_int_zero_p (p1))
1603         return false;
1604       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1605                               precision);
1606       return double_int_zero_p (res);
1607
1608     default:
1609       return false;
1610     }
1611 }
1612
1613 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1614
1615 static bool
1616 may_be_unaligned_p (tree ref, tree step)
1617 {
1618   tree base;
1619   tree base_type;
1620   HOST_WIDE_INT bitsize;
1621   HOST_WIDE_INT bitpos;
1622   tree toffset;
1623   enum machine_mode mode;
1624   int unsignedp, volatilep;
1625   unsigned base_align;
1626
1627   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1628      thus they are not misaligned.  */
1629   if (TREE_CODE (ref) == TARGET_MEM_REF)
1630     return false;
1631
1632   /* The test below is basically copy of what expr.c:normal_inner_ref
1633      does to check whether the object must be loaded by parts when
1634      STRICT_ALIGNMENT is true.  */
1635   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1636                               &unsignedp, &volatilep, true);
1637   base_type = TREE_TYPE (base);
1638   base_align = TYPE_ALIGN (base_type);
1639
1640   if (mode != BLKmode)
1641     {
1642       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1643
1644       if (base_align < mode_align
1645           || (bitpos % mode_align) != 0
1646           || (bitpos % BITS_PER_UNIT) != 0)
1647         return true;
1648
1649       if (toffset
1650           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1651         return true;
1652
1653       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1654         return true;
1655     }
1656
1657   return false;
1658 }
1659
1660 /* Return true if EXPR may be non-addressable.   */
1661
1662 bool
1663 may_be_nonaddressable_p (tree expr)
1664 {
1665   switch (TREE_CODE (expr))
1666     {
1667     case TARGET_MEM_REF:
1668       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1669          target, thus they are always addressable.  */
1670       return false;
1671
1672     case COMPONENT_REF:
1673       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1674              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1675
1676     case VIEW_CONVERT_EXPR:
1677       /* This kind of view-conversions may wrap non-addressable objects
1678          and make them look addressable.  After some processing the
1679          non-addressability may be uncovered again, causing ADDR_EXPRs
1680          of inappropriate objects to be built.  */
1681       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1682           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1683         return true;
1684
1685       /* ... fall through ... */
1686
1687     case ARRAY_REF:
1688     case ARRAY_RANGE_REF:
1689       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1690
1691     CASE_CONVERT:
1692       return true;
1693
1694     default:
1695       break;
1696     }
1697
1698   return false;
1699 }
1700
1701 /* Finds addresses in *OP_P inside STMT.  */
1702
1703 static void
1704 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1705 {
1706   tree base = *op_p, step = size_zero_node;
1707   struct iv *civ;
1708   struct ifs_ivopts_data ifs_ivopts_data;
1709
1710   /* Do not play with volatile memory references.  A bit too conservative,
1711      perhaps, but safe.  */
1712   if (gimple_has_volatile_ops (stmt))
1713     goto fail;
1714
1715   /* Ignore bitfields for now.  Not really something terribly complicated
1716      to handle.  TODO.  */
1717   if (TREE_CODE (base) == BIT_FIELD_REF)
1718     goto fail;
1719
1720   base = unshare_expr (base);
1721
1722   if (TREE_CODE (base) == TARGET_MEM_REF)
1723     {
1724       tree type = build_pointer_type (TREE_TYPE (base));
1725       tree astep;
1726
1727       if (TMR_BASE (base)
1728           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1729         {
1730           civ = get_iv (data, TMR_BASE (base));
1731           if (!civ)
1732             goto fail;
1733
1734           TMR_BASE (base) = civ->base;
1735           step = civ->step;
1736         }
1737       if (TMR_INDEX2 (base)
1738           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1739         {
1740           civ = get_iv (data, TMR_INDEX2 (base));
1741           if (!civ)
1742             goto fail;
1743
1744           TMR_INDEX2 (base) = civ->base;
1745           step = civ->step;
1746         }
1747       if (TMR_INDEX (base)
1748           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1749         {
1750           civ = get_iv (data, TMR_INDEX (base));
1751           if (!civ)
1752             goto fail;
1753
1754           TMR_INDEX (base) = civ->base;
1755           astep = civ->step;
1756
1757           if (astep)
1758             {
1759               if (TMR_STEP (base))
1760                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1761
1762               step = fold_build2 (PLUS_EXPR, type, step, astep);
1763             }
1764         }
1765
1766       if (integer_zerop (step))
1767         goto fail;
1768       base = tree_mem_ref_addr (type, base);
1769     }
1770   else
1771     {
1772       ifs_ivopts_data.ivopts_data = data;
1773       ifs_ivopts_data.stmt = stmt;
1774       ifs_ivopts_data.step = size_zero_node;
1775       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1776           || integer_zerop (ifs_ivopts_data.step))
1777         goto fail;
1778       step = ifs_ivopts_data.step;
1779
1780       /* Check that the base expression is addressable.  This needs
1781          to be done after substituting bases of IVs into it.  */
1782       if (may_be_nonaddressable_p (base))
1783         goto fail;
1784
1785       /* Moreover, on strict alignment platforms, check that it is
1786          sufficiently aligned.  */
1787       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1788         goto fail;
1789
1790       base = build_fold_addr_expr (base);
1791
1792       /* Substituting bases of IVs into the base expression might
1793          have caused folding opportunities.  */
1794       if (TREE_CODE (base) == ADDR_EXPR)
1795         {
1796           tree *ref = &TREE_OPERAND (base, 0);
1797           while (handled_component_p (*ref))
1798             ref = &TREE_OPERAND (*ref, 0);
1799           if (TREE_CODE (*ref) == MEM_REF)
1800             {
1801               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1802                                       TREE_OPERAND (*ref, 0),
1803                                       TREE_OPERAND (*ref, 1));
1804               if (tem)
1805                 *ref = tem;
1806             }
1807         }
1808     }
1809
1810   civ = alloc_iv (base, step);
1811   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1812   return;
1813
1814 fail:
1815   for_each_index (op_p, idx_record_use, data);
1816 }
1817
1818 /* Finds and records invariants used in STMT.  */
1819
1820 static void
1821 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1822 {
1823   ssa_op_iter iter;
1824   use_operand_p use_p;
1825   tree op;
1826
1827   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1828     {
1829       op = USE_FROM_PTR (use_p);
1830       record_invariant (data, op, false);
1831     }
1832 }
1833
1834 /* Finds interesting uses of induction variables in the statement STMT.  */
1835
1836 static void
1837 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1838 {
1839   struct iv *iv;
1840   tree op, *lhs, *rhs;
1841   ssa_op_iter iter;
1842   use_operand_p use_p;
1843   enum tree_code code;
1844
1845   find_invariants_stmt (data, stmt);
1846
1847   if (gimple_code (stmt) == GIMPLE_COND)
1848     {
1849       find_interesting_uses_cond (data, stmt);
1850       return;
1851     }
1852
1853   if (is_gimple_assign (stmt))
1854     {
1855       lhs = gimple_assign_lhs_ptr (stmt);
1856       rhs = gimple_assign_rhs1_ptr (stmt);
1857
1858       if (TREE_CODE (*lhs) == SSA_NAME)
1859         {
1860           /* If the statement defines an induction variable, the uses are not
1861              interesting by themselves.  */
1862
1863           iv = get_iv (data, *lhs);
1864
1865           if (iv && !integer_zerop (iv->step))
1866             return;
1867         }
1868
1869       code = gimple_assign_rhs_code (stmt);
1870       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1871           && (REFERENCE_CLASS_P (*rhs)
1872               || is_gimple_val (*rhs)))
1873         {
1874           if (REFERENCE_CLASS_P (*rhs))
1875             find_interesting_uses_address (data, stmt, rhs);
1876           else
1877             find_interesting_uses_op (data, *rhs);
1878
1879           if (REFERENCE_CLASS_P (*lhs))
1880             find_interesting_uses_address (data, stmt, lhs);
1881           return;
1882         }
1883       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1884         {
1885           find_interesting_uses_cond (data, stmt);
1886           return;
1887         }
1888
1889       /* TODO -- we should also handle address uses of type
1890
1891          memory = call (whatever);
1892
1893          and
1894
1895          call (memory).  */
1896     }
1897
1898   if (gimple_code (stmt) == GIMPLE_PHI
1899       && gimple_bb (stmt) == data->current_loop->header)
1900     {
1901       iv = get_iv (data, PHI_RESULT (stmt));
1902
1903       if (iv && !integer_zerop (iv->step))
1904         return;
1905     }
1906
1907   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1908     {
1909       op = USE_FROM_PTR (use_p);
1910
1911       if (TREE_CODE (op) != SSA_NAME)
1912         continue;
1913
1914       iv = get_iv (data, op);
1915       if (!iv)
1916         continue;
1917
1918       find_interesting_uses_op (data, op);
1919     }
1920 }
1921
1922 /* Finds interesting uses of induction variables outside of loops
1923    on loop exit edge EXIT.  */
1924
1925 static void
1926 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1927 {
1928   gimple phi;
1929   gimple_stmt_iterator psi;
1930   tree def;
1931
1932   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1933     {
1934       phi = gsi_stmt (psi);
1935       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1936       if (is_gimple_reg (def))
1937         find_interesting_uses_op (data, def);
1938     }
1939 }
1940
1941 /* Finds uses of the induction variables that are interesting.  */
1942
1943 static void
1944 find_interesting_uses (struct ivopts_data *data)
1945 {
1946   basic_block bb;
1947   gimple_stmt_iterator bsi;
1948   basic_block *body = get_loop_body (data->current_loop);
1949   unsigned i;
1950   struct version_info *info;
1951   edge e;
1952
1953   if (dump_file && (dump_flags & TDF_DETAILS))
1954     fprintf (dump_file, "Uses:\n\n");
1955
1956   for (i = 0; i < data->current_loop->num_nodes; i++)
1957     {
1958       edge_iterator ei;
1959       bb = body[i];
1960
1961       FOR_EACH_EDGE (e, ei, bb->succs)
1962         if (e->dest != EXIT_BLOCK_PTR
1963             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1964           find_interesting_uses_outside (data, e);
1965
1966       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1967         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1968       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1969         if (!is_gimple_debug (gsi_stmt (bsi)))
1970           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1971     }
1972
1973   if (dump_file && (dump_flags & TDF_DETAILS))
1974     {
1975       bitmap_iterator bi;
1976
1977       fprintf (dump_file, "\n");
1978
1979       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1980         {
1981           info = ver_info (data, i);
1982           if (info->inv_id)
1983             {
1984               fprintf (dump_file, "  ");
1985               print_generic_expr (dump_file, info->name, TDF_SLIM);
1986               fprintf (dump_file, " is invariant (%d)%s\n",
1987                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1988             }
1989         }
1990
1991       fprintf (dump_file, "\n");
1992     }
1993
1994   free (body);
1995 }
1996
1997 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1998    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1999    we are at the top-level of the processed address.  */
2000
2001 static tree
2002 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2003                 unsigned HOST_WIDE_INT *offset)
2004 {
2005   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2006   enum tree_code code;
2007   tree type, orig_type = TREE_TYPE (expr);
2008   unsigned HOST_WIDE_INT off0, off1, st;
2009   tree orig_expr = expr;
2010
2011   STRIP_NOPS (expr);
2012
2013   type = TREE_TYPE (expr);
2014   code = TREE_CODE (expr);
2015   *offset = 0;
2016
2017   switch (code)
2018     {
2019     case INTEGER_CST:
2020       if (!cst_and_fits_in_hwi (expr)
2021           || integer_zerop (expr))
2022         return orig_expr;
2023
2024       *offset = int_cst_value (expr);
2025       return build_int_cst (orig_type, 0);
2026
2027     case POINTER_PLUS_EXPR:
2028     case PLUS_EXPR:
2029     case MINUS_EXPR:
2030       op0 = TREE_OPERAND (expr, 0);
2031       op1 = TREE_OPERAND (expr, 1);
2032
2033       op0 = strip_offset_1 (op0, false, false, &off0);
2034       op1 = strip_offset_1 (op1, false, false, &off1);
2035
2036       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2037       if (op0 == TREE_OPERAND (expr, 0)
2038           && op1 == TREE_OPERAND (expr, 1))
2039         return orig_expr;
2040
2041       if (integer_zerop (op1))
2042         expr = op0;
2043       else if (integer_zerop (op0))
2044         {
2045           if (code == MINUS_EXPR)
2046             expr = fold_build1 (NEGATE_EXPR, type, op1);
2047           else
2048             expr = op1;
2049         }
2050       else
2051         expr = fold_build2 (code, type, op0, op1);
2052
2053       return fold_convert (orig_type, expr);
2054
2055     case MULT_EXPR:
2056       op1 = TREE_OPERAND (expr, 1);
2057       if (!cst_and_fits_in_hwi (op1))
2058         return orig_expr;
2059
2060       op0 = TREE_OPERAND (expr, 0);
2061       op0 = strip_offset_1 (op0, false, false, &off0);
2062       if (op0 == TREE_OPERAND (expr, 0))
2063         return orig_expr;
2064
2065       *offset = off0 * int_cst_value (op1);
2066       if (integer_zerop (op0))
2067         expr = op0;
2068       else
2069         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2070
2071       return fold_convert (orig_type, expr);
2072
2073     case ARRAY_REF:
2074     case ARRAY_RANGE_REF:
2075       if (!inside_addr)
2076         return orig_expr;
2077
2078       step = array_ref_element_size (expr);
2079       if (!cst_and_fits_in_hwi (step))
2080         break;
2081
2082       st = int_cst_value (step);
2083       op1 = TREE_OPERAND (expr, 1);
2084       op1 = strip_offset_1 (op1, false, false, &off1);
2085       *offset = off1 * st;
2086
2087       if (top_compref
2088           && integer_zerop (op1))
2089         {
2090           /* Strip the component reference completely.  */
2091           op0 = TREE_OPERAND (expr, 0);
2092           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2093           *offset += off0;
2094           return op0;
2095         }
2096       break;
2097
2098     case COMPONENT_REF:
2099       if (!inside_addr)
2100         return orig_expr;
2101
2102       tmp = component_ref_field_offset (expr);
2103       if (top_compref
2104           && cst_and_fits_in_hwi (tmp))
2105         {
2106           /* Strip the component reference completely.  */
2107           op0 = TREE_OPERAND (expr, 0);
2108           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2109           *offset = off0 + int_cst_value (tmp);
2110           return op0;
2111         }
2112       break;
2113
2114     case ADDR_EXPR:
2115       op0 = TREE_OPERAND (expr, 0);
2116       op0 = strip_offset_1 (op0, true, true, &off0);
2117       *offset += off0;
2118
2119       if (op0 == TREE_OPERAND (expr, 0))
2120         return orig_expr;
2121
2122       expr = build_fold_addr_expr (op0);
2123       return fold_convert (orig_type, expr);
2124
2125     case MEM_REF:
2126       /* ???  Offset operand?  */
2127       inside_addr = false;
2128       break;
2129
2130     default:
2131       return orig_expr;
2132     }
2133
2134   /* Default handling of expressions for that we want to recurse into
2135      the first operand.  */
2136   op0 = TREE_OPERAND (expr, 0);
2137   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2138   *offset += off0;
2139
2140   if (op0 == TREE_OPERAND (expr, 0)
2141       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2142     return orig_expr;
2143
2144   expr = copy_node (expr);
2145   TREE_OPERAND (expr, 0) = op0;
2146   if (op1)
2147     TREE_OPERAND (expr, 1) = op1;
2148
2149   /* Inside address, we might strip the top level component references,
2150      thus changing type of the expression.  Handling of ADDR_EXPR
2151      will fix that.  */
2152   expr = fold_convert (orig_type, expr);
2153
2154   return expr;
2155 }
2156
2157 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2158
2159 static tree
2160 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2161 {
2162   return strip_offset_1 (expr, false, false, offset);
2163 }
2164
2165 /* Returns variant of TYPE that can be used as base for different uses.
2166    We return unsigned type with the same precision, which avoids problems
2167    with overflows.  */
2168
2169 static tree
2170 generic_type_for (tree type)
2171 {
2172   if (POINTER_TYPE_P (type))
2173     return unsigned_type_for (type);
2174
2175   if (TYPE_UNSIGNED (type))
2176     return type;
2177
2178   return unsigned_type_for (type);
2179 }
2180
2181 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2182    the bitmap to that we should store it.  */
2183
2184 static struct ivopts_data *fd_ivopts_data;
2185 static tree
2186 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2187 {
2188   bitmap *depends_on = (bitmap *) data;
2189   struct version_info *info;
2190
2191   if (TREE_CODE (*expr_p) != SSA_NAME)
2192     return NULL_TREE;
2193   info = name_info (fd_ivopts_data, *expr_p);
2194
2195   if (!info->inv_id || info->has_nonlin_use)
2196     return NULL_TREE;
2197
2198   if (!*depends_on)
2199     *depends_on = BITMAP_ALLOC (NULL);
2200   bitmap_set_bit (*depends_on, info->inv_id);
2201
2202   return NULL_TREE;
2203 }
2204
2205 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2206    position to POS.  If USE is not NULL, the candidate is set as related to
2207    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2208    replacement of the final value of the iv by a direct computation.  */
2209
2210 static struct iv_cand *
2211 add_candidate_1 (struct ivopts_data *data,
2212                  tree base, tree step, bool important, enum iv_position pos,
2213                  struct iv_use *use, gimple incremented_at)
2214 {
2215   unsigned i;
2216   struct iv_cand *cand = NULL;
2217   tree type, orig_type;
2218
2219   if (base)
2220     {
2221       orig_type = TREE_TYPE (base);
2222       type = generic_type_for (orig_type);
2223       if (type != orig_type)
2224         {
2225           base = fold_convert (type, base);
2226           step = fold_convert (type, step);
2227         }
2228     }
2229
2230   for (i = 0; i < n_iv_cands (data); i++)
2231     {
2232       cand = iv_cand (data, i);
2233
2234       if (cand->pos != pos)
2235         continue;
2236
2237       if (cand->incremented_at != incremented_at
2238           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2239               && cand->ainc_use != use))
2240         continue;
2241
2242       if (!cand->iv)
2243         {
2244           if (!base && !step)
2245             break;
2246
2247           continue;
2248         }
2249
2250       if (!base && !step)
2251         continue;
2252
2253       if (operand_equal_p (base, cand->iv->base, 0)
2254           && operand_equal_p (step, cand->iv->step, 0)
2255           && (TYPE_PRECISION (TREE_TYPE (base))
2256               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2257         break;
2258     }
2259
2260   if (i == n_iv_cands (data))
2261     {
2262       cand = XCNEW (struct iv_cand);
2263       cand->id = i;
2264
2265       if (!base && !step)
2266         cand->iv = NULL;
2267       else
2268         cand->iv = alloc_iv (base, step);
2269
2270       cand->pos = pos;
2271       if (pos != IP_ORIGINAL && cand->iv)
2272         {
2273           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2274           cand->var_after = cand->var_before;
2275         }
2276       cand->important = important;
2277       cand->incremented_at = incremented_at;
2278       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2279
2280       if (step
2281           && TREE_CODE (step) != INTEGER_CST)
2282         {
2283           fd_ivopts_data = data;
2284           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2285         }
2286
2287       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2288         cand->ainc_use = use;
2289       else
2290         cand->ainc_use = NULL;
2291
2292       if (dump_file && (dump_flags & TDF_DETAILS))
2293         dump_cand (dump_file, cand);
2294     }
2295
2296   if (important && !cand->important)
2297     {
2298       cand->important = true;
2299       if (dump_file && (dump_flags & TDF_DETAILS))
2300         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2301     }
2302
2303   if (use)
2304     {
2305       bitmap_set_bit (use->related_cands, i);
2306       if (dump_file && (dump_flags & TDF_DETAILS))
2307         fprintf (dump_file, "Candidate %d is related to use %d\n",
2308                  cand->id, use->id);
2309     }
2310
2311   return cand;
2312 }
2313
2314 /* Returns true if incrementing the induction variable at the end of the LOOP
2315    is allowed.
2316
2317    The purpose is to avoid splitting latch edge with a biv increment, thus
2318    creating a jump, possibly confusing other optimization passes and leaving
2319    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2320    is not available (so we do not have a better alternative), or if the latch
2321    edge is already nonempty.  */
2322
2323 static bool
2324 allow_ip_end_pos_p (struct loop *loop)
2325 {
2326   if (!ip_normal_pos (loop))
2327     return true;
2328
2329   if (!empty_block_p (ip_end_pos (loop)))
2330     return true;
2331
2332   return false;
2333 }
2334
2335 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2336    Important field is set to IMPORTANT.  */
2337
2338 static void
2339 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2340                         bool important, struct iv_use *use)
2341 {
2342   basic_block use_bb = gimple_bb (use->stmt);
2343   enum machine_mode mem_mode;
2344   unsigned HOST_WIDE_INT cstepi;
2345
2346   /* If we insert the increment in any position other than the standard
2347      ones, we must ensure that it is incremented once per iteration.
2348      It must not be in an inner nested loop, or one side of an if
2349      statement.  */
2350   if (use_bb->loop_father != data->current_loop
2351       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2352       || stmt_could_throw_p (use->stmt)
2353       || !cst_and_fits_in_hwi (step))
2354     return;
2355
2356   cstepi = int_cst_value (step);
2357
2358   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2359   if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2360       || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2361     {
2362       enum tree_code code = MINUS_EXPR;
2363       tree new_base;
2364       tree new_step = step;
2365
2366       if (POINTER_TYPE_P (TREE_TYPE (base)))
2367         {
2368           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2369           code = POINTER_PLUS_EXPR;
2370         }
2371       else
2372         new_step = fold_convert (TREE_TYPE (base), new_step);
2373       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2374       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2375                        use->stmt);
2376     }
2377   if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2378       || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2379     {
2380       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2381                        use->stmt);
2382     }
2383 }
2384
2385 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2386    position to POS.  If USE is not NULL, the candidate is set as related to
2387    it.  The candidate computation is scheduled on all available positions.  */
2388
2389 static void
2390 add_candidate (struct ivopts_data *data,
2391                tree base, tree step, bool important, struct iv_use *use)
2392 {
2393   if (ip_normal_pos (data->current_loop))
2394     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2395   if (ip_end_pos (data->current_loop)
2396       && allow_ip_end_pos_p (data->current_loop))
2397     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2398
2399   if (use != NULL && use->type == USE_ADDRESS)
2400     add_autoinc_candidates (data, base, step, important, use);
2401 }
2402
2403 /* Add a standard "0 + 1 * iteration" iv candidate for a
2404    type with SIZE bits.  */
2405
2406 static void
2407 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2408                                      unsigned int size)
2409 {
2410   tree type = lang_hooks.types.type_for_size (size, true);
2411   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2412                  true, NULL);
2413 }
2414
2415 /* Adds standard iv candidates.  */
2416
2417 static void
2418 add_standard_iv_candidates (struct ivopts_data *data)
2419 {
2420   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2421
2422   /* The same for a double-integer type if it is still fast enough.  */
2423   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2424     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2425 }
2426
2427
2428 /* Adds candidates bases on the old induction variable IV.  */
2429
2430 static void
2431 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2432 {
2433   gimple phi;
2434   tree def;
2435   struct iv_cand *cand;
2436
2437   add_candidate (data, iv->base, iv->step, true, NULL);
2438
2439   /* The same, but with initial value zero.  */
2440   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2441     add_candidate (data, size_int (0), iv->step, true, NULL);
2442   else
2443     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2444                    iv->step, true, NULL);
2445
2446   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2447   if (gimple_code (phi) == GIMPLE_PHI)
2448     {
2449       /* Additionally record the possibility of leaving the original iv
2450          untouched.  */
2451       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2452       cand = add_candidate_1 (data,
2453                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2454                               SSA_NAME_DEF_STMT (def));
2455       cand->var_before = iv->ssa_name;
2456       cand->var_after = def;
2457     }
2458 }
2459
2460 /* Adds candidates based on the old induction variables.  */
2461
2462 static void
2463 add_old_ivs_candidates (struct ivopts_data *data)
2464 {
2465   unsigned i;
2466   struct iv *iv;
2467   bitmap_iterator bi;
2468
2469   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2470     {
2471       iv = ver_info (data, i)->iv;
2472       if (iv && iv->biv_p && !integer_zerop (iv->step))
2473         add_old_iv_candidates (data, iv);
2474     }
2475 }
2476
2477 /* Adds candidates based on the value of the induction variable IV and USE.  */
2478
2479 static void
2480 add_iv_value_candidates (struct ivopts_data *data,
2481                          struct iv *iv, struct iv_use *use)
2482 {
2483   unsigned HOST_WIDE_INT offset;
2484   tree base;
2485   tree basetype;
2486
2487   add_candidate (data, iv->base, iv->step, false, use);
2488
2489   /* The same, but with initial value zero.  Make such variable important,
2490      since it is generic enough so that possibly many uses may be based
2491      on it.  */
2492   basetype = TREE_TYPE (iv->base);
2493   if (POINTER_TYPE_P (basetype))
2494     basetype = sizetype;
2495   add_candidate (data, build_int_cst (basetype, 0),
2496                  iv->step, true, use);
2497
2498   /* Third, try removing the constant offset.  Make sure to even
2499      add a candidate for &a[0] vs. (T *)&a.  */
2500   base = strip_offset (iv->base, &offset);
2501   if (offset
2502       || base != iv->base)
2503     add_candidate (data, base, iv->step, false, use);
2504 }
2505
2506 /* Adds candidates based on the uses.  */
2507
2508 static void
2509 add_derived_ivs_candidates (struct ivopts_data *data)
2510 {
2511   unsigned i;
2512
2513   for (i = 0; i < n_iv_uses (data); i++)
2514     {
2515       struct iv_use *use = iv_use (data, i);
2516
2517       if (!use)
2518         continue;
2519
2520       switch (use->type)
2521         {
2522         case USE_NONLINEAR_EXPR:
2523         case USE_COMPARE:
2524         case USE_ADDRESS:
2525           /* Just add the ivs based on the value of the iv used here.  */
2526           add_iv_value_candidates (data, use->iv, use);
2527           break;
2528
2529         default:
2530           gcc_unreachable ();
2531         }
2532     }
2533 }
2534
2535 /* Record important candidates and add them to related_cands bitmaps
2536    if needed.  */
2537
2538 static void
2539 record_important_candidates (struct ivopts_data *data)
2540 {
2541   unsigned i;
2542   struct iv_use *use;
2543
2544   for (i = 0; i < n_iv_cands (data); i++)
2545     {
2546       struct iv_cand *cand = iv_cand (data, i);
2547
2548       if (cand->important)
2549         bitmap_set_bit (data->important_candidates, i);
2550     }
2551
2552   data->consider_all_candidates = (n_iv_cands (data)
2553                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2554
2555   if (data->consider_all_candidates)
2556     {
2557       /* We will not need "related_cands" bitmaps in this case,
2558          so release them to decrease peak memory consumption.  */
2559       for (i = 0; i < n_iv_uses (data); i++)
2560         {
2561           use = iv_use (data, i);
2562           BITMAP_FREE (use->related_cands);
2563         }
2564     }
2565   else
2566     {
2567       /* Add important candidates to the related_cands bitmaps.  */
2568       for (i = 0; i < n_iv_uses (data); i++)
2569         bitmap_ior_into (iv_use (data, i)->related_cands,
2570                          data->important_candidates);
2571     }
2572 }
2573
2574 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2575    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2576    we allocate a simple list to every use.  */
2577
2578 static void
2579 alloc_use_cost_map (struct ivopts_data *data)
2580 {
2581   unsigned i, size, s, j;
2582
2583   for (i = 0; i < n_iv_uses (data); i++)
2584     {
2585       struct iv_use *use = iv_use (data, i);
2586       bitmap_iterator bi;
2587
2588       if (data->consider_all_candidates)
2589         size = n_iv_cands (data);
2590       else
2591         {
2592           s = 0;
2593           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2594             {
2595               s++;
2596             }
2597
2598           /* Round up to the power of two, so that moduling by it is fast.  */
2599           for (size = 1; size < s; size <<= 1)
2600             continue;
2601         }
2602
2603       use->n_map_members = size;
2604       use->cost_map = XCNEWVEC (struct cost_pair, size);
2605     }
2606 }
2607
2608 /* Returns description of computation cost of expression whose runtime
2609    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2610
2611 static comp_cost
2612 new_cost (unsigned runtime, unsigned complexity)
2613 {
2614   comp_cost cost;
2615
2616   cost.cost = runtime;
2617   cost.complexity = complexity;
2618
2619   return cost;
2620 }
2621
2622 /* Adds costs COST1 and COST2.  */
2623
2624 static comp_cost
2625 add_costs (comp_cost cost1, comp_cost cost2)
2626 {
2627   cost1.cost += cost2.cost;
2628   cost1.complexity += cost2.complexity;
2629
2630   return cost1;
2631 }
2632 /* Subtracts costs COST1 and COST2.  */
2633
2634 static comp_cost
2635 sub_costs (comp_cost cost1, comp_cost cost2)
2636 {
2637   cost1.cost -= cost2.cost;
2638   cost1.complexity -= cost2.complexity;
2639
2640   return cost1;
2641 }
2642
2643 /* Returns a negative number if COST1 < COST2, a positive number if
2644    COST1 > COST2, and 0 if COST1 = COST2.  */
2645
2646 static int
2647 compare_costs (comp_cost cost1, comp_cost cost2)
2648 {
2649   if (cost1.cost == cost2.cost)
2650     return cost1.complexity - cost2.complexity;
2651
2652   return cost1.cost - cost2.cost;
2653 }
2654
2655 /* Returns true if COST is infinite.  */
2656
2657 static bool
2658 infinite_cost_p (comp_cost cost)
2659 {
2660   return cost.cost == INFTY;
2661 }
2662
2663 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2664    on invariants DEPENDS_ON and that the value used in expressing it
2665    is VALUE.  */
2666
2667 static void
2668 set_use_iv_cost (struct ivopts_data *data,
2669                  struct iv_use *use, struct iv_cand *cand,
2670                  comp_cost cost, bitmap depends_on, tree value,
2671                  int inv_expr_id)
2672 {
2673   unsigned i, s;
2674
2675   if (infinite_cost_p (cost))
2676     {
2677       BITMAP_FREE (depends_on);
2678       return;
2679     }
2680
2681   if (data->consider_all_candidates)
2682     {
2683       use->cost_map[cand->id].cand = cand;
2684       use->cost_map[cand->id].cost = cost;
2685       use->cost_map[cand->id].depends_on = depends_on;
2686       use->cost_map[cand->id].value = value;
2687       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2688       return;
2689     }
2690
2691   /* n_map_members is a power of two, so this computes modulo.  */
2692   s = cand->id & (use->n_map_members - 1);
2693   for (i = s; i < use->n_map_members; i++)
2694     if (!use->cost_map[i].cand)
2695       goto found;
2696   for (i = 0; i < s; i++)
2697     if (!use->cost_map[i].cand)
2698       goto found;
2699
2700   gcc_unreachable ();
2701
2702 found:
2703   use->cost_map[i].cand = cand;
2704   use->cost_map[i].cost = cost;
2705   use->cost_map[i].depends_on = depends_on;
2706   use->cost_map[i].value = value;
2707   use->cost_map[i].inv_expr_id = inv_expr_id;
2708 }
2709
2710 /* Gets cost of (USE, CANDIDATE) pair.  */
2711
2712 static struct cost_pair *
2713 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2714                  struct iv_cand *cand)
2715 {
2716   unsigned i, s;
2717   struct cost_pair *ret;
2718
2719   if (!cand)
2720     return NULL;
2721
2722   if (data->consider_all_candidates)
2723     {
2724       ret = use->cost_map + cand->id;
2725       if (!ret->cand)
2726         return NULL;
2727
2728       return ret;
2729     }
2730
2731   /* n_map_members is a power of two, so this computes modulo.  */
2732   s = cand->id & (use->n_map_members - 1);
2733   for (i = s; i < use->n_map_members; i++)
2734     if (use->cost_map[i].cand == cand)
2735       return use->cost_map + i;
2736
2737   for (i = 0; i < s; i++)
2738     if (use->cost_map[i].cand == cand)
2739       return use->cost_map + i;
2740
2741   return NULL;
2742 }
2743
2744 /* Returns estimate on cost of computing SEQ.  */
2745
2746 static unsigned
2747 seq_cost (rtx seq, bool speed)
2748 {
2749   unsigned cost = 0;
2750   rtx set;
2751
2752   for (; seq; seq = NEXT_INSN (seq))
2753     {
2754       set = single_set (seq);
2755       if (set)
2756         cost += rtx_cost (SET_SRC (set), SET, speed);
2757       else
2758         cost++;
2759     }
2760
2761   return cost;
2762 }
2763
2764 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2765 static rtx
2766 produce_memory_decl_rtl (tree obj, int *regno)
2767 {
2768   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2769   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2770   rtx x;
2771
2772   gcc_assert (obj);
2773   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2774     {
2775       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2776       x = gen_rtx_SYMBOL_REF (address_mode, name);
2777       SET_SYMBOL_REF_DECL (x, obj);
2778       x = gen_rtx_MEM (DECL_MODE (obj), x);
2779       set_mem_addr_space (x, as);
2780       targetm.encode_section_info (obj, x, true);
2781     }
2782   else
2783     {
2784       x = gen_raw_REG (address_mode, (*regno)++);
2785       x = gen_rtx_MEM (DECL_MODE (obj), x);
2786       set_mem_addr_space (x, as);
2787     }
2788
2789   return x;
2790 }
2791
2792 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2793    walk_tree.  DATA contains the actual fake register number.  */
2794
2795 static tree
2796 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2797 {
2798   tree obj = NULL_TREE;
2799   rtx x = NULL_RTX;
2800   int *regno = (int *) data;
2801
2802   switch (TREE_CODE (*expr_p))
2803     {
2804     case ADDR_EXPR:
2805       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2806            handled_component_p (*expr_p);
2807            expr_p = &TREE_OPERAND (*expr_p, 0))
2808         continue;
2809       obj = *expr_p;
2810       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2811         x = produce_memory_decl_rtl (obj, regno);
2812       break;
2813
2814     case SSA_NAME:
2815       *ws = 0;
2816       obj = SSA_NAME_VAR (*expr_p);
2817       if (!DECL_RTL_SET_P (obj))
2818         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2819       break;
2820
2821     case VAR_DECL:
2822     case PARM_DECL:
2823     case RESULT_DECL:
2824       *ws = 0;
2825       obj = *expr_p;
2826
2827       if (DECL_RTL_SET_P (obj))
2828         break;
2829
2830       if (DECL_MODE (obj) == BLKmode)
2831         x = produce_memory_decl_rtl (obj, regno);
2832       else
2833         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2834
2835       break;
2836
2837     default:
2838       break;
2839     }
2840
2841   if (x)
2842     {
2843       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2844       SET_DECL_RTL (obj, x);
2845     }
2846
2847   return NULL_TREE;
2848 }
2849
2850 /* Determines cost of the computation of EXPR.  */
2851
2852 static unsigned
2853 computation_cost (tree expr, bool speed)
2854 {
2855   rtx seq, rslt;
2856   tree type = TREE_TYPE (expr);
2857   unsigned cost;
2858   /* Avoid using hard regs in ways which may be unsupported.  */
2859   int regno = LAST_VIRTUAL_REGISTER + 1;
2860   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2861   enum node_frequency real_frequency = node->frequency;
2862
2863   node->frequency = NODE_FREQUENCY_NORMAL;
2864   crtl->maybe_hot_insn_p = speed;
2865   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2866   start_sequence ();
2867   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2868   seq = get_insns ();
2869   end_sequence ();
2870   default_rtl_profile ();
2871   node->frequency = real_frequency;
2872
2873   cost = seq_cost (seq, speed);
2874   if (MEM_P (rslt))
2875     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2876                           TYPE_ADDR_SPACE (type), speed);
2877   else if (!REG_P (rslt))
2878     cost += rtx_cost (rslt, SET, speed);
2879
2880   return cost;
2881 }
2882
2883 /* Returns variable containing the value of candidate CAND at statement AT.  */
2884
2885 static tree
2886 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2887 {
2888   if (stmt_after_increment (loop, cand, stmt))
2889     return cand->var_after;
2890   else
2891     return cand->var_before;
2892 }
2893
2894 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2895    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2896
2897 int
2898 tree_int_cst_sign_bit (const_tree t)
2899 {
2900   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2901   unsigned HOST_WIDE_INT w;
2902
2903   if (bitno < HOST_BITS_PER_WIDE_INT)
2904     w = TREE_INT_CST_LOW (t);
2905   else
2906     {
2907       w = TREE_INT_CST_HIGH (t);
2908       bitno -= HOST_BITS_PER_WIDE_INT;
2909     }
2910
2911   return (w >> bitno) & 1;
2912 }
2913
2914 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2915    same precision that is at least as wide as the precision of TYPE, stores
2916    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2917    type of A and B.  */
2918
2919 static tree
2920 determine_common_wider_type (tree *a, tree *b)
2921 {
2922   tree wider_type = NULL;
2923   tree suba, subb;
2924   tree atype = TREE_TYPE (*a);
2925
2926   if (CONVERT_EXPR_P (*a))
2927     {
2928       suba = TREE_OPERAND (*a, 0);
2929       wider_type = TREE_TYPE (suba);
2930       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2931         return atype;
2932     }
2933   else
2934     return atype;
2935
2936   if (CONVERT_EXPR_P (*b))
2937     {
2938       subb = TREE_OPERAND (*b, 0);
2939       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2940         return atype;
2941     }
2942   else
2943     return atype;
2944
2945   *a = suba;
2946   *b = subb;
2947   return wider_type;
2948 }
2949
2950 /* Determines the expression by that USE is expressed from induction variable
2951    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2952    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2953
2954 static bool
2955 get_computation_aff (struct loop *loop,
2956                      struct iv_use *use, struct iv_cand *cand, gimple at,
2957                      struct affine_tree_combination *aff)
2958 {
2959   tree ubase = use->iv->base;
2960   tree ustep = use->iv->step;
2961   tree cbase = cand->iv->base;
2962   tree cstep = cand->iv->step, cstep_common;
2963   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2964   tree common_type, var;
2965   tree uutype;
2966   aff_tree cbase_aff, var_aff;
2967   double_int rat;
2968
2969   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2970     {
2971       /* We do not have a precision to express the values of use.  */
2972       return false;
2973     }
2974
2975   var = var_at_stmt (loop, cand, at);
2976   uutype = unsigned_type_for (utype);
2977
2978   /* If the conversion is not noop, perform it.  */
2979   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2980     {
2981       cstep = fold_convert (uutype, cstep);
2982       cbase = fold_convert (uutype, cbase);
2983       var = fold_convert (uutype, var);
2984     }
2985
2986   if (!constant_multiple_of (ustep, cstep, &rat))
2987     return false;
2988
2989   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2990      type, we achieve better folding by computing their difference in this
2991      wider type, and cast the result to UUTYPE.  We do not need to worry about
2992      overflows, as all the arithmetics will in the end be performed in UUTYPE
2993      anyway.  */
2994   common_type = determine_common_wider_type (&ubase, &cbase);
2995
2996   /* use = ubase - ratio * cbase + ratio * var.  */
2997   tree_to_aff_combination (ubase, common_type, aff);
2998   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2999   tree_to_aff_combination (var, uutype, &var_aff);
3000
3001   /* We need to shift the value if we are after the increment.  */
3002   if (stmt_after_increment (loop, cand, at))
3003     {
3004       aff_tree cstep_aff;
3005
3006       if (common_type != uutype)
3007         cstep_common = fold_convert (common_type, cstep);
3008       else
3009         cstep_common = cstep;
3010
3011       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3012       aff_combination_add (&cbase_aff, &cstep_aff);
3013     }
3014
3015   aff_combination_scale (&cbase_aff, double_int_neg (rat));
3016   aff_combination_add (aff, &cbase_aff);
3017   if (common_type != uutype)
3018     aff_combination_convert (aff, uutype);
3019
3020   aff_combination_scale (&var_aff, rat);
3021   aff_combination_add (aff, &var_aff);
3022
3023   return true;
3024 }
3025
3026 /* Determines the expression by that USE is expressed from induction variable
3027    CAND at statement AT in LOOP.  The computation is unshared.  */
3028
3029 static tree
3030 get_computation_at (struct loop *loop,
3031                     struct iv_use *use, struct iv_cand *cand, gimple at)
3032 {
3033   aff_tree aff;
3034   tree type = TREE_TYPE (use->iv->base);
3035
3036   if (!get_computation_aff (loop, use, cand, at, &aff))
3037     return NULL_TREE;
3038   unshare_aff_combination (&aff);
3039   return fold_convert (type, aff_combination_to_tree (&aff));
3040 }
3041
3042 /* Determines the expression by that USE is expressed from induction variable
3043    CAND in LOOP.  The computation is unshared.  */
3044
3045 static tree
3046 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3047 {
3048   return get_computation_at (loop, use, cand, use->stmt);
3049 }
3050
3051 /* Adjust the cost COST for being in loop setup rather than loop body.
3052    If we're optimizing for space, the loop setup overhead is constant;
3053    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3054 static unsigned
3055 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3056 {
3057   if (cost == INFTY)
3058     return cost;
3059   else if (optimize_loop_for_speed_p (data->current_loop))
3060     return cost / avg_loop_niter (data->current_loop);
3061   else
3062     return cost;
3063 }
3064
3065 /* Returns cost of addition in MODE.  */
3066
3067 static unsigned
3068 add_cost (enum machine_mode mode, bool speed)
3069 {
3070   static unsigned costs[NUM_MACHINE_MODES];
3071   rtx seq;
3072   unsigned cost;
3073
3074   if (costs[mode])
3075     return costs[mode];
3076
3077   start_sequence ();
3078   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3079                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3080                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3081                  NULL_RTX);
3082   seq = get_insns ();
3083   end_sequence ();
3084
3085   cost = seq_cost (seq, speed);
3086   if (!cost)
3087     cost = 1;
3088
3089   costs[mode] = cost;
3090
3091   if (dump_file && (dump_flags & TDF_DETAILS))
3092     fprintf (dump_file, "Addition in %s costs %d\n",
3093              GET_MODE_NAME (mode), cost);
3094   return cost;
3095 }
3096
3097 /* Entry in a hashtable of already known costs for multiplication.  */
3098 struct mbc_entry
3099 {
3100   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3101   enum machine_mode mode;       /* In mode.  */
3102   unsigned cost;                /* The cost.  */
3103 };
3104
3105 /* Counts hash value for the ENTRY.  */
3106
3107 static hashval_t
3108 mbc_entry_hash (const void *entry)
3109 {
3110   const struct mbc_entry *e = (const struct mbc_entry *) entry;
3111
3112   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3113 }
3114
3115 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3116
3117 static int
3118 mbc_entry_eq (const void *entry1, const void *entry2)
3119 {
3120   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3121   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3122
3123   return (e1->mode == e2->mode
3124           && e1->cst == e2->cst);
3125 }
3126
3127 /* Returns cost of multiplication by constant CST in MODE.  */
3128
3129 unsigned
3130 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3131 {
3132   static htab_t costs;
3133   struct mbc_entry **cached, act;
3134   rtx seq;
3135   unsigned cost;
3136
3137   if (!costs)
3138     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3139
3140   act.mode = mode;
3141   act.cst = cst;
3142   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3143   if (*cached)
3144     return (*cached)->cost;
3145
3146   *cached = XNEW (struct mbc_entry);
3147   (*cached)->mode = mode;
3148   (*cached)->cst = cst;
3149
3150   start_sequence ();
3151   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3152                gen_int_mode (cst, mode), NULL_RTX, 0);
3153   seq = get_insns ();
3154   end_sequence ();
3155
3156   cost = seq_cost (seq, speed);
3157
3158   if (dump_file && (dump_flags & TDF_DETAILS))
3159     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3160              (int) cst, GET_MODE_NAME (mode), cost);
3161
3162   (*cached)->cost = cost;
3163
3164   return cost;
3165 }
3166
3167 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3168    validity for a memory reference accessing memory of mode MODE in
3169    address space AS.  */
3170
3171 DEF_VEC_P (sbitmap);
3172 DEF_VEC_ALLOC_P (sbitmap, heap);
3173
3174 bool
3175 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3176                                  addr_space_t as)
3177 {
3178 #define MAX_RATIO 128
3179   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3180   static VEC (sbitmap, heap) *valid_mult_list;
3181   sbitmap valid_mult;
3182
3183   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3184     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3185
3186   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3187   if (!valid_mult)
3188     {
3189       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3190       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3191       rtx addr;
3192       HOST_WIDE_INT i;
3193
3194       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3195       sbitmap_zero (valid_mult);
3196       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3197       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3198         {
3199           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3200           if (memory_address_addr_space_p (mode, addr, as))
3201             SET_BIT (valid_mult, i + MAX_RATIO);
3202         }
3203
3204       if (dump_file && (dump_flags & TDF_DETAILS))
3205         {
3206           fprintf (dump_file, "  allowed multipliers:");
3207           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3208             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3209               fprintf (dump_file, " %d", (int) i);
3210           fprintf (dump_file, "\n");
3211           fprintf (dump_file, "\n");
3212         }
3213
3214       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3215     }
3216
3217   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3218     return false;
3219
3220   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3221 }
3222
3223 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3224    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3225    variable is omitted.  Compute the cost for a memory reference that accesses
3226    a memory location of mode MEM_MODE in address space AS.
3227
3228    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3229    size of MEM_MODE / RATIO) is available.  To make this determination, we
3230    look at the size of the increment to be made, which is given in CSTEP.
3231    CSTEP may be zero if the step is unknown.
3232    STMT_AFTER_INC is true iff the statement we're looking at is after the
3233    increment of the original biv.
3234
3235    TODO -- there must be some better way.  This all is quite crude.  */
3236
3237 typedef struct
3238 {
3239   HOST_WIDE_INT min_offset, max_offset;
3240   unsigned costs[2][2][2][2];
3241 } *address_cost_data;
3242
3243 DEF_VEC_P (address_cost_data);
3244 DEF_VEC_ALLOC_P (address_cost_data, heap);
3245
3246 static comp_cost
3247 get_address_cost (bool symbol_present, bool var_present,
3248                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3249                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3250                   addr_space_t as, bool speed,
3251                   bool stmt_after_inc, bool *may_autoinc)
3252 {
3253   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3254   static VEC(address_cost_data, heap) *address_cost_data_list;
3255   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3256   address_cost_data data;
3257   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3258   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3259   unsigned cost, acost, complexity;
3260   bool offset_p, ratio_p, autoinc;
3261   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3262   unsigned HOST_WIDE_INT mask;
3263   unsigned bits;
3264
3265   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3266     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3267                            data_index + 1);
3268
3269   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3270   if (!data)
3271     {
3272       HOST_WIDE_INT i;
3273       HOST_WIDE_INT rat, off = 0;
3274       int old_cse_not_expected, width;
3275       unsigned sym_p, var_p, off_p, rat_p, add_c;
3276       rtx seq, addr, base;
3277       rtx reg0, reg1;
3278
3279       data = (address_cost_data) xcalloc (1, sizeof (*data));
3280
3281       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3282
3283       width = GET_MODE_BITSIZE (address_mode) - 1;
3284       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3285         width = HOST_BITS_PER_WIDE_INT - 1;
3286       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3287
3288       for (i = width; i >= 0; i--)
3289         {
3290           off = -((HOST_WIDE_INT) 1 << i);
3291           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3292           if (memory_address_addr_space_p (mem_mode, addr, as))
3293             break;
3294         }
3295       data->min_offset = (i == -1? 0 : off);
3296
3297       for (i = width; i >= 0; i--)
3298         {
3299           off = ((HOST_WIDE_INT) 1 << i) - 1;
3300           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3301           if (memory_address_addr_space_p (mem_mode, addr, as))
3302             break;
3303         }
3304       if (i == -1)
3305         off = 0;
3306       data->max_offset = off;
3307
3308       if (dump_file && (dump_flags & TDF_DETAILS))
3309         {
3310           fprintf (dump_file, "get_address_cost:\n");
3311           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3312                    GET_MODE_NAME (mem_mode),
3313                    data->min_offset);
3314           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3315                    GET_MODE_NAME (mem_mode),
3316                    data->max_offset);
3317         }
3318
3319       rat = 1;
3320       for (i = 2; i <= MAX_RATIO; i++)
3321         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3322           {
3323             rat = i;
3324             break;
3325           }
3326
3327       /* Compute the cost of various addressing modes.  */
3328       acost = 0;
3329       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3330       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3331
3332       if (HAVE_PRE_DECREMENT)
3333         {
3334           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3335           has_predec[mem_mode]
3336             = memory_address_addr_space_p (mem_mode, addr, as);
3337         }
3338       if (HAVE_POST_DECREMENT)
3339         {
3340           addr = gen_rtx_POST_DEC (address_mode, reg0);
3341           has_postdec[mem_mode]
3342             = memory_address_addr_space_p (mem_mode, addr, as);
3343         }
3344       if (HAVE_PRE_INCREMENT)
3345         {
3346           addr = gen_rtx_PRE_INC (address_mode, reg0);
3347           has_preinc[mem_mode]
3348             = memory_address_addr_space_p (mem_mode, addr, as);
3349         }
3350       if (HAVE_POST_INCREMENT)
3351         {
3352           addr = gen_rtx_POST_INC (address_mode, reg0);
3353           has_postinc[mem_mode]
3354             = memory_address_addr_space_p (mem_mode, addr, as);
3355         }
3356       for (i = 0; i < 16; i++)
3357         {
3358           sym_p = i & 1;
3359           var_p = (i >> 1) & 1;
3360           off_p = (i >> 2) & 1;
3361           rat_p = (i >> 3) & 1;
3362
3363           addr = reg0;
3364           if (rat_p)
3365             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3366                                    gen_int_mode (rat, address_mode));
3367
3368           if (var_p)
3369             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3370
3371           if (sym_p)
3372             {
3373               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3374               /* ??? We can run into trouble with some backends by presenting
3375                  it with symbols which haven't been properly passed through
3376                  targetm.encode_section_info.  By setting the local bit, we
3377                  enhance the probability of things working.  */
3378               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3379
3380               if (off_p)
3381                 base = gen_rtx_fmt_e (CONST, address_mode,
3382                                       gen_rtx_fmt_ee
3383                                         (PLUS, address_mode, base,
3384                                          gen_int_mode (off, address_mode)));
3385             }
3386           else if (off_p)
3387             base = gen_int_mode (off, address_mode);
3388           else
3389             base = NULL_RTX;
3390
3391           if (base)
3392             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3393
3394           start_sequence ();
3395           /* To avoid splitting addressing modes, pretend that no cse will
3396              follow.  */
3397           old_cse_not_expected = cse_not_expected;
3398           cse_not_expected = true;
3399           addr = memory_address_addr_space (mem_mode, addr, as);
3400           cse_not_expected = old_cse_not_expected;
3401           seq = get_insns ();
3402           end_sequence ();
3403
3404           acost = seq_cost (seq, speed);
3405           acost += address_cost (addr, mem_mode, as, speed);
3406
3407           if (!acost)
3408             acost = 1;
3409           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3410         }
3411
3412       /* On some targets, it is quite expensive to load symbol to a register,
3413          which makes addresses that contain symbols look much more expensive.
3414          However, the symbol will have to be loaded in any case before the
3415          loop (and quite likely we have it in register already), so it does not
3416          make much sense to penalize them too heavily.  So make some final
3417          tweaks for the SYMBOL_PRESENT modes:
3418
3419          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3420          var is cheaper, use this mode with small penalty.
3421          If VAR_PRESENT is true, try whether the mode with
3422          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3423          if this is the case, use it.  */
3424       add_c = add_cost (address_mode, speed);
3425       for (i = 0; i < 8; i++)
3426         {
3427           var_p = i & 1;
3428           off_p = (i >> 1) & 1;
3429           rat_p = (i >> 2) & 1;
3430
3431           acost = data->costs[0][1][off_p][rat_p] + 1;
3432           if (var_p)
3433             acost += add_c;
3434
3435           if (acost < data->costs[1][var_p][off_p][rat_p])
3436             data->costs[1][var_p][off_p][rat_p] = acost;
3437         }
3438
3439       if (dump_file && (dump_flags & TDF_DETAILS))
3440         {
3441           fprintf (dump_file, "Address costs:\n");
3442
3443           for (i = 0; i < 16; i++)
3444             {
3445               sym_p = i & 1;
3446               var_p = (i >> 1) & 1;
3447               off_p = (i >> 2) & 1;
3448               rat_p = (i >> 3) & 1;
3449
3450               fprintf (dump_file, "  ");
3451               if (sym_p)
3452                 fprintf (dump_file, "sym + ");
3453               if (var_p)
3454                 fprintf (dump_file, "var + ");
3455               if (off_p)
3456                 fprintf (dump_file, "cst + ");
3457               if (rat_p)
3458                 fprintf (dump_file, "rat * ");
3459
3460               acost = data->costs[sym_p][var_p][off_p][rat_p];
3461               fprintf (dump_file, "index costs %d\n", acost);
3462             }
3463           if (has_predec[mem_mode] || has_postdec[mem_mode]
3464               || has_preinc[mem_mode] || has_postinc[mem_mode])
3465             fprintf (dump_file, "  May include autoinc/dec\n");
3466           fprintf (dump_file, "\n");
3467         }
3468
3469       VEC_replace (address_cost_data, address_cost_data_list,
3470                    data_index, data);
3471     }
3472
3473   bits = GET_MODE_BITSIZE (address_mode);
3474   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3475   offset &= mask;
3476   if ((offset >> (bits - 1) & 1))
3477     offset |= ~mask;
3478   s_offset = offset;
3479
3480   autoinc = false;
3481   msize = GET_MODE_SIZE (mem_mode);
3482   autoinc_offset = offset;
3483   if (stmt_after_inc)
3484     autoinc_offset += ratio * cstep;
3485   if (symbol_present || var_present || ratio != 1)
3486     autoinc = false;
3487   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3488                && msize == cstep)
3489            || (has_postdec[mem_mode] && autoinc_offset == 0
3490                && msize == -cstep)
3491            || (has_preinc[mem_mode] && autoinc_offset == msize
3492                && msize == cstep)
3493            || (has_predec[mem_mode] && autoinc_offset == -msize
3494                && msize == -cstep))
3495     autoinc = true;
3496
3497   cost = 0;
3498   offset_p = (s_offset != 0
3499               && data->min_offset <= s_offset
3500               && s_offset <= data->max_offset);
3501   ratio_p = (ratio != 1
3502              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3503
3504   if (ratio != 1 && !ratio_p)
3505     cost += multiply_by_cost (ratio, address_mode, speed);
3506
3507   if (s_offset && !offset_p && !symbol_present)
3508     cost += add_cost (address_mode, speed);
3509
3510   if (may_autoinc)
3511     *may_autoinc = autoinc;
3512   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3513   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3514   return new_cost (cost + acost, complexity);
3515 }
3516
3517  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3518     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3519     calculating the operands of EXPR.  Returns true if successful, and returns
3520     the cost in COST.  */
3521
3522 static bool
3523 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3524                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3525 {
3526   comp_cost res;
3527   tree op1 = TREE_OPERAND (expr, 1);
3528   tree cst = TREE_OPERAND (mult, 1);
3529   tree multop = TREE_OPERAND (mult, 0);
3530   int m = exact_log2 (int_cst_value (cst));
3531   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3532   int sa_cost;
3533
3534   if (!(m >= 0 && m < maxm))
3535     return false;
3536
3537   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3538              ? shiftadd_cost[speed][mode][m]
3539              : (mult == op1
3540                 ? shiftsub1_cost[speed][mode][m]
3541                 : shiftsub0_cost[speed][mode][m]));
3542   res = new_cost (sa_cost, 0);
3543   res = add_costs (res, mult == op1 ? cost0 : cost1);
3544
3545   STRIP_NOPS (multop);
3546   if (!is_gimple_val (multop))
3547     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3548
3549   *cost = res;
3550   return true;
3551 }
3552
3553 /* Estimates cost of forcing expression EXPR into a variable.  */
3554
3555 static comp_cost
3556 force_expr_to_var_cost (tree expr, bool speed)
3557 {
3558   static bool costs_initialized = false;
3559   static unsigned integer_cost [2];
3560   static unsigned symbol_cost [2];
3561   static unsigned address_cost [2];
3562   tree op0, op1;
3563   comp_cost cost0, cost1, cost;
3564   enum machine_mode mode;
3565
3566   if (!costs_initialized)
3567     {
3568       tree type = build_pointer_type (integer_type_node);
3569       tree var, addr;
3570       rtx x;
3571       int i;
3572
3573       var = create_tmp_var_raw (integer_type_node, "test_var");
3574       TREE_STATIC (var) = 1;
3575       x = produce_memory_decl_rtl (var, NULL);
3576       SET_DECL_RTL (var, x);
3577
3578       addr = build1 (ADDR_EXPR, type, var);
3579
3580
3581       for (i = 0; i < 2; i++)
3582         {
3583           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3584                                                              2000), i);
3585
3586           symbol_cost[i] = computation_cost (addr, i) + 1;
3587
3588           address_cost[i]
3589             = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3590                                         addr,
3591                                         build_int_cst (sizetype, 2000)), i) + 1;
3592           if (dump_file && (dump_flags & TDF_DETAILS))
3593             {
3594               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3595               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3596               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3597               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3598               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3599               fprintf (dump_file, "\n");
3600             }
3601         }
3602
3603       costs_initialized = true;
3604     }
3605
3606   STRIP_NOPS (expr);
3607
3608   if (SSA_VAR_P (expr))
3609     return zero_cost;
3610
3611   if (is_gimple_min_invariant (expr))
3612     {
3613       if (TREE_CODE (expr) == INTEGER_CST)
3614         return new_cost (integer_cost [speed], 0);
3615
3616       if (TREE_CODE (expr) == ADDR_EXPR)
3617         {
3618           tree obj = TREE_OPERAND (expr, 0);
3619
3620           if (TREE_CODE (obj) == VAR_DECL
3621               || TREE_CODE (obj) == PARM_DECL
3622               || TREE_CODE (obj) == RESULT_DECL)
3623             return new_cost (symbol_cost [speed], 0);
3624         }
3625
3626       return new_cost (address_cost [speed], 0);
3627     }
3628
3629   switch (TREE_CODE (expr))
3630     {
3631     case POINTER_PLUS_EXPR:
3632     case PLUS_EXPR:
3633     case MINUS_EXPR:
3634     case MULT_EXPR:
3635       op0 = TREE_OPERAND (expr, 0);
3636       op1 = TREE_OPERAND (expr, 1);
3637       STRIP_NOPS (op0);
3638       STRIP_NOPS (op1);
3639
3640       if (is_gimple_val (op0))
3641         cost0 = zero_cost;
3642       else
3643         cost0 = force_expr_to_var_cost (op0, speed);
3644
3645       if (is_gimple_val (op1))
3646         cost1 = zero_cost;
3647       else
3648         cost1 = force_expr_to_var_cost (op1, speed);
3649
3650       break;
3651
3652     case NEGATE_EXPR:
3653       op0 = TREE_OPERAND (expr, 0);
3654       STRIP_NOPS (op0);
3655       op1 = NULL_TREE;
3656
3657       if (is_gimple_val (op0))
3658         cost0 = zero_cost;
3659       else
3660         cost0 = force_expr_to_var_cost (op0, speed);
3661
3662       cost1 = zero_cost;
3663       break;
3664
3665     default:
3666       /* Just an arbitrary value, FIXME.  */
3667       return new_cost (target_spill_cost[speed], 0);
3668     }
3669
3670   mode = TYPE_MODE (TREE_TYPE (expr));
3671   switch (TREE_CODE (expr))
3672     {
3673     case POINTER_PLUS_EXPR:
3674     case PLUS_EXPR:
3675     case MINUS_EXPR:
3676     case NEGATE_EXPR:
3677       cost = new_cost (add_cost (mode, speed), 0);
3678       if (TREE_CODE (expr) != NEGATE_EXPR)
3679         {
3680           tree mult = NULL_TREE;
3681           comp_cost sa_cost;
3682           if (TREE_CODE (op1) == MULT_EXPR)
3683             mult = op1;
3684           else if (TREE_CODE (op0) == MULT_EXPR)
3685             mult = op0;
3686
3687           if (mult != NULL_TREE
3688               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3689               && get_shiftadd_cost (expr, mode, cost0, cost1, mult, speed,
3690                                     &sa_cost))
3691             return sa_cost;
3692         }
3693       break;
3694
3695     case MULT_EXPR:
3696       if (cst_and_fits_in_hwi (op0))
3697         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3698       else if (cst_and_fits_in_hwi (op1))
3699         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3700       else
3701         return new_cost (target_spill_cost [speed], 0);
3702       break;
3703
3704     default:
3705       gcc_unreachable ();
3706     }
3707
3708   cost = add_costs (cost, cost0);
3709   cost = add_costs (cost, cost1);
3710
3711   /* Bound the cost by target_spill_cost.  The parts of complicated
3712      computations often are either loop invariant or at least can
3713      be shared between several iv uses, so letting this grow without
3714      limits would not give reasonable results.  */
3715   if (cost.cost > (int) target_spill_cost [speed])
3716     cost.cost = target_spill_cost [speed];
3717
3718   return cost;
3719 }
3720
3721 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3722    invariants the computation depends on.  */
3723
3724 static comp_cost
3725 force_var_cost (struct ivopts_data *data,
3726                 tree expr, bitmap *depends_on)
3727 {
3728   if (depends_on)
3729     {
3730       fd_ivopts_data = data;
3731       walk_tree (&expr, find_depends, depends_on, NULL);
3732     }
3733
3734   return force_expr_to_var_cost (expr, data->speed);
3735 }
3736
3737 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3738    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3739    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3740    invariants the computation depends on.  */
3741
3742 static comp_cost
3743 split_address_cost (struct ivopts_data *data,
3744                     tree addr, bool *symbol_present, bool *var_present,
3745                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3746 {
3747   tree core;
3748   HOST_WIDE_INT bitsize;
3749   HOST_WIDE_INT bitpos;
3750   tree toffset;
3751   enum machine_mode mode;
3752   int unsignedp, volatilep;
3753
3754   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3755                               &unsignedp, &volatilep, false);
3756
3757   if (toffset != 0
3758       || bitpos % BITS_PER_UNIT != 0
3759       || TREE_CODE (core) != VAR_DECL)
3760     {
3761       *symbol_present = false;
3762       *var_present = true;
3763       fd_ivopts_data = data;
3764       walk_tree (&addr, find_depends, depends_on, NULL);
3765       return new_cost (target_spill_cost[data->speed], 0);
3766     }
3767
3768   *offset += bitpos / BITS_PER_UNIT;
3769   if (TREE_STATIC (core)
3770       || DECL_EXTERNAL (core))
3771     {
3772       *symbol_present = true;
3773       *var_present = false;
3774       return zero_cost;
3775     }
3776
3777   *symbol_present = false;
3778   *var_present = true;
3779   return zero_cost;
3780 }
3781
3782 /* Estimates cost of expressing difference of addresses E1 - E2 as
3783    var + symbol + offset.  The value of offset is added to OFFSET,
3784    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3785    part is missing.  DEPENDS_ON is a set of the invariants the computation
3786    depends on.  */
3787
3788 static comp_cost
3789 ptr_difference_cost (struct ivopts_data *data,
3790                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3791                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3792 {
3793   HOST_WIDE_INT diff = 0;
3794   aff_tree aff_e1, aff_e2;
3795   tree type;
3796
3797   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3798
3799   if (ptr_difference_const (e1, e2, &diff))
3800     {
3801       *offset += diff;
3802       *symbol_present = false;
3803       *var_present = false;
3804       return zero_cost;
3805     }
3806
3807   if (integer_zerop (e2))
3808     return split_address_cost (data, TREE_OPERAND (e1, 0),
3809                                symbol_present, var_present, offset, depends_on);
3810
3811   *symbol_present = false;
3812   *var_present = true;
3813
3814   type = signed_type_for (TREE_TYPE (e1));
3815   tree_to_aff_combination (e1, type, &aff_e1);
3816   tree_to_aff_combination (e2, type, &aff_e2);
3817   aff_combination_scale (&aff_e2, double_int_minus_one);
3818   aff_combination_add (&aff_e1, &aff_e2);
3819
3820   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3821 }
3822
3823 /* Estimates cost of expressing difference E1 - E2 as
3824    var + symbol + offset.  The value of offset is added to OFFSET,
3825    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3826    part is missing.  DEPENDS_ON is a set of the invariants the computation
3827    depends on.  */
3828
3829 static comp_cost
3830 difference_cost (struct ivopts_data *data,
3831                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3832                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3833 {
3834   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3835   unsigned HOST_WIDE_INT off1, off2;
3836   aff_tree aff_e1, aff_e2;
3837   tree type;
3838
3839   e1 = strip_offset (e1, &off1);
3840   e2 = strip_offset (e2, &off2);
3841   *offset += off1 - off2;
3842
3843   STRIP_NOPS (e1);
3844   STRIP_NOPS (e2);
3845
3846   if (TREE_CODE (e1) == ADDR_EXPR)
3847     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3848                                 offset, depends_on);
3849   *symbol_present = false;
3850
3851   if (operand_equal_p (e1, e2, 0))
3852     {
3853       *var_present = false;
3854       return zero_cost;
3855     }
3856
3857   *var_present = true;
3858
3859   if (integer_zerop (e2))
3860     return force_var_cost (data, e1, depends_on);
3861
3862   if (integer_zerop (e1))
3863     {
3864       comp_cost cost = force_var_cost (data, e2, depends_on);
3865       cost.cost += multiply_by_cost (-1, mode, data->speed);
3866       return cost;
3867     }
3868
3869   type = signed_type_for (TREE_TYPE (e1));
3870   tree_to_aff_combination (e1, type, &aff_e1);
3871   tree_to_aff_combination (e2, type, &aff_e2);
3872   aff_combination_scale (&aff_e2, double_int_minus_one);
3873   aff_combination_add (&aff_e1, &aff_e2);
3874
3875   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3876 }
3877
3878 /* Returns true if AFF1 and AFF2 are identical.  */
3879
3880 static bool
3881 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3882 {
3883   unsigned i;
3884
3885   if (aff1->n != aff2->n)
3886     return false;
3887
3888   for (i = 0; i < aff1->n; i++)
3889     {
3890       if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3891         return false;
3892
3893       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3894         return false;
3895     }
3896   return true;
3897 }
3898
3899 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3900
3901 static int
3902 get_expr_id (struct ivopts_data *data, tree expr)
3903 {
3904   struct iv_inv_expr_ent ent;
3905   struct iv_inv_expr_ent **slot;
3906
3907   ent.expr = expr;
3908   ent.hash = iterative_hash_expr (expr, 0);
3909   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3910                                                      &ent, INSERT);
3911   if (*slot)
3912     return (*slot)->id;
3913
3914   *slot = XNEW (struct iv_inv_expr_ent);
3915   (*slot)->expr = expr;
3916   (*slot)->hash = ent.hash;
3917   (*slot)->id = data->inv_expr_id++;
3918   return (*slot)->id;
3919 }
3920
3921 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3922    requires a new compiler generated temporary.  Returns -1 otherwise.
3923    ADDRESS_P is a flag indicating if the expression is for address
3924    computation.  */
3925
3926 static int
3927 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3928                             tree cbase, HOST_WIDE_INT ratio,
3929                             bool address_p)
3930 {
3931   aff_tree ubase_aff, cbase_aff;
3932   tree expr, ub, cb;
3933
3934   STRIP_NOPS (ubase);
3935   STRIP_NOPS (cbase);
3936   ub = ubase;
3937   cb = cbase;
3938
3939   if ((TREE_CODE (ubase) == INTEGER_CST)
3940       && (TREE_CODE (cbase) == INTEGER_CST))
3941     return -1;
3942
3943   /* Strips the constant part. */
3944   if (TREE_CODE (ubase) == PLUS_EXPR
3945       || TREE_CODE (ubase) == MINUS_EXPR
3946       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3947     {
3948       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3949         ubase = TREE_OPERAND (ubase, 0);
3950     }
3951
3952   /* Strips the constant part. */
3953   if (TREE_CODE (cbase) == PLUS_EXPR
3954       || TREE_CODE (cbase) == MINUS_EXPR
3955       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3956     {
3957       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3958         cbase = TREE_OPERAND (cbase, 0);
3959     }
3960
3961   if (address_p)
3962     {
3963       if (((TREE_CODE (ubase) == SSA_NAME)
3964            || (TREE_CODE (ubase) == ADDR_EXPR
3965                && is_gimple_min_invariant (ubase)))
3966           && (TREE_CODE (cbase) == INTEGER_CST))
3967         return -1;
3968
3969       if (((TREE_CODE (cbase) == SSA_NAME)
3970            || (TREE_CODE (cbase) == ADDR_EXPR
3971                && is_gimple_min_invariant (cbase)))
3972           && (TREE_CODE (ubase) == INTEGER_CST))
3973         return -1;
3974     }
3975
3976   if (ratio == 1)
3977     {
3978       if(operand_equal_p (ubase, cbase, 0))
3979         return -1;
3980
3981       if (TREE_CODE (ubase) == ADDR_EXPR
3982           && TREE_CODE (cbase) == ADDR_EXPR)
3983         {
3984           tree usym, csym;
3985
3986           usym = TREE_OPERAND (ubase, 0);
3987           csym = TREE_OPERAND (cbase, 0);
3988           if (TREE_CODE (usym) == ARRAY_REF)
3989             {
3990               tree ind = TREE_OPERAND (usym, 1);
3991               if (TREE_CODE (ind) == INTEGER_CST
3992                   && host_integerp (ind, 0)
3993                   && TREE_INT_CST_LOW (ind) == 0)
3994                 usym = TREE_OPERAND (usym, 0);
3995             }
3996           if (TREE_CODE (csym) == ARRAY_REF)
3997             {
3998               tree ind = TREE_OPERAND (csym, 1);
3999               if (TREE_CODE (ind) == INTEGER_CST
4000                   && host_integerp (ind, 0)
4001                   && TREE_INT_CST_LOW (ind) == 0)
4002                 csym = TREE_OPERAND (csym, 0);
4003             }
4004           if (operand_equal_p (usym, csym, 0))
4005             return -1;
4006         }
4007       /* Now do more complex comparison  */
4008       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4009       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4010       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4011         return -1;
4012     }
4013
4014   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4015   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4016
4017   aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
4018   aff_combination_add (&ubase_aff, &cbase_aff);
4019   expr = aff_combination_to_tree (&ubase_aff);
4020   return get_expr_id (data, expr);
4021 }
4022
4023
4024
4025 /* Determines the cost of the computation by that USE is expressed
4026    from induction variable CAND.  If ADDRESS_P is true, we just need
4027    to create an address from it, otherwise we want to get it into
4028    register.  A set of invariants we depend on is stored in
4029    DEPENDS_ON.  AT is the statement at that the value is computed.
4030    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4031    addressing is likely.  */
4032
4033 static comp_cost
4034 get_computation_cost_at (struct ivopts_data *data,
4035                          struct iv_use *use, struct iv_cand *cand,
4036                          bool address_p, bitmap *depends_on, gimple at,
4037                          bool *can_autoinc,
4038                          int *inv_expr_id)
4039 {
4040   tree ubase = use->iv->base, ustep = use->iv->step;
4041   tree cbase, cstep;
4042   tree utype = TREE_TYPE (ubase), ctype;
4043   unsigned HOST_WIDE_INT cstepi, offset = 0;
4044   HOST_WIDE_INT ratio, aratio;
4045   bool var_present, symbol_present, stmt_is_after_inc;
4046   comp_cost cost;
4047   double_int rat;
4048   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4049
4050   *depends_on = NULL;
4051
4052   /* Only consider real candidates.  */
4053   if (!cand->iv)
4054     return infinite_cost;
4055
4056   cbase = cand->iv->base;
4057   cstep = cand->iv->step;
4058   ctype = TREE_TYPE (cbase);
4059
4060   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4061     {
4062       /* We do not have a precision to express the values of use.  */
4063       return infinite_cost;
4064     }
4065
4066   if (address_p)
4067     {
4068       /* Do not try to express address of an object with computation based
4069          on address of a different object.  This may cause problems in rtl
4070          level alias analysis (that does not expect this to be happening,
4071          as this is illegal in C), and would be unlikely to be useful
4072          anyway.  */
4073       if (use->iv->base_object
4074           && cand->iv->base_object
4075           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4076         return infinite_cost;
4077     }
4078
4079   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4080     {
4081       /* TODO -- add direct handling of this case.  */
4082       goto fallback;
4083     }
4084
4085   /* CSTEPI is removed from the offset in case statement is after the
4086      increment.  If the step is not constant, we use zero instead.
4087      This is a bit imprecise (there is the extra addition), but
4088      redundancy elimination is likely to transform the code so that
4089      it uses value of the variable before increment anyway,
4090      so it is not that much unrealistic.  */
4091   if (cst_and_fits_in_hwi (cstep))
4092     cstepi = int_cst_value (cstep);
4093   else
4094     cstepi = 0;
4095
4096   if (!constant_multiple_of (ustep, cstep, &rat))
4097     return infinite_cost;
4098
4099   if (double_int_fits_in_shwi_p (rat))
4100     ratio = double_int_to_shwi (rat);
4101   else
4102     return infinite_cost;
4103
4104   STRIP_NOPS (cbase);
4105   ctype = TREE_TYPE (cbase);
4106
4107   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4108
4109   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4110      or ratio == 1, it is better to handle this like
4111
4112      ubase - ratio * cbase + ratio * var
4113
4114      (also holds in the case ratio == -1, TODO.  */
4115
4116   if (cst_and_fits_in_hwi (cbase))
4117     {
4118       offset = - ratio * int_cst_value (cbase);
4119       cost = difference_cost (data,
4120                               ubase, build_int_cst (utype, 0),
4121                               &symbol_present, &var_present, &offset,
4122                               depends_on);
4123       cost.cost /= avg_loop_niter (data->current_loop);
4124     }
4125   else if (ratio == 1)
4126     {
4127       tree real_cbase = cbase;
4128
4129       /* Check to see if any adjustment is needed.  */
4130       if (cstepi == 0 && stmt_is_after_inc)
4131         {
4132           aff_tree real_cbase_aff;
4133           aff_tree cstep_aff;
4134
4135           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4136                                    &real_cbase_aff);
4137           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4138
4139           aff_combination_add (&real_cbase_aff, &cstep_aff);
4140           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4141         }
4142
4143       cost = difference_cost (data,
4144                               ubase, real_cbase,
4145                               &symbol_present, &var_present, &offset,
4146                               depends_on);
4147       cost.cost /= avg_loop_niter (data->current_loop);
4148     }
4149   else if (address_p
4150            && !POINTER_TYPE_P (ctype)
4151            && multiplier_allowed_in_address_p
4152                 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4153                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4154     {
4155       cbase
4156         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4157       cost = difference_cost (data,
4158                               ubase, cbase,
4159                               &symbol_present, &var_present, &offset,
4160                               depends_on);
4161       cost.cost /= avg_loop_niter (data->current_loop);
4162     }
4163   else
4164     {
4165       cost = force_var_cost (data, cbase, depends_on);
4166       cost = add_costs (cost,
4167                         difference_cost (data,
4168                                          ubase, build_int_cst (utype, 0),
4169                                          &symbol_present, &var_present,
4170                                          &offset, depends_on));
4171       cost.cost /= avg_loop_niter (data->current_loop);
4172       cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4173     }
4174
4175   if (inv_expr_id)
4176     {
4177       *inv_expr_id =
4178           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4179       /* Clear depends on.  */
4180       if (*inv_expr_id != -1 && depends_on && *depends_on)
4181         bitmap_clear (*depends_on);
4182     }
4183
4184   /* If we are after the increment, the value of the candidate is higher by
4185      one iteration.  */
4186   if (stmt_is_after_inc)
4187     offset -= ratio * cstepi;
4188
4189   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4190      (symbol/var1/const parts may be omitted).  If we are looking for an
4191      address, find the cost of addressing this.  */
4192   if (address_p)
4193     return add_costs (cost,
4194                       get_address_cost (symbol_present, var_present,
4195                                         offset, ratio, cstepi,
4196                                         TYPE_MODE (TREE_TYPE (utype)),
4197                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4198                                         speed, stmt_is_after_inc,
4199                                         can_autoinc));
4200
4201   /* Otherwise estimate the costs for computing the expression.  */
4202   if (!symbol_present && !var_present && !offset)
4203     {
4204       if (ratio != 1)
4205         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4206       return cost;
4207     }
4208
4209   /* Symbol + offset should be compile-time computable so consider that they
4210       are added once to the variable, if present.  */
4211   if (var_present && (symbol_present || offset))
4212     cost.cost += adjust_setup_cost (data,
4213                                     add_cost (TYPE_MODE (ctype), speed));
4214
4215   /* Having offset does not affect runtime cost in case it is added to
4216      symbol, but it increases complexity.  */
4217   if (offset)
4218     cost.complexity++;
4219
4220   cost.cost += add_cost (TYPE_MODE (ctype), speed);
4221
4222   aratio = ratio > 0 ? ratio : -ratio;
4223   if (aratio != 1)
4224     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4225   return cost;
4226
4227 fallback:
4228   if (can_autoinc)
4229     *can_autoinc = false;
4230
4231   {
4232     /* Just get the expression, expand it and measure the cost.  */
4233     tree comp = get_computation_at (data->current_loop, use, cand, at);
4234
4235     if (!comp)
4236       return infinite_cost;
4237
4238     if (address_p)
4239       comp = build_simple_mem_ref (comp);
4240
4241     return new_cost (computation_cost (comp, speed), 0);
4242   }
4243 }
4244
4245 /* Determines the cost of the computation by that USE is expressed
4246    from induction variable CAND.  If ADDRESS_P is true, we just need
4247    to create an address from it, otherwise we want to get it into
4248    register.  A set of invariants we depend on is stored in
4249    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4250    autoinc addressing is likely.  */
4251
4252 static comp_cost
4253 get_computation_cost (struct ivopts_data *data,
4254                       struct iv_use *use, struct iv_cand *cand,
4255                       bool address_p, bitmap *depends_on,
4256                       bool *can_autoinc, int *inv_expr_id)
4257 {
4258   return get_computation_cost_at (data,
4259                                   use, cand, address_p, depends_on, use->stmt,
4260                                   can_autoinc, inv_expr_id);
4261 }
4262
4263 /* Determines cost of basing replacement of USE on CAND in a generic
4264    expression.  */
4265
4266 static bool
4267 determine_use_iv_cost_generic (struct ivopts_data *data,
4268                                struct iv_use *use, struct iv_cand *cand)
4269 {
4270   bitmap depends_on;
4271   comp_cost cost;
4272   int inv_expr_id = -1;
4273
4274   /* The simple case first -- if we need to express value of the preserved
4275      original biv, the cost is 0.  This also prevents us from counting the
4276      cost of increment twice -- once at this use and once in the cost of
4277      the candidate.  */
4278   if (cand->pos == IP_ORIGINAL
4279       && cand->incremented_at == use->stmt)
4280     {
4281       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4282       return true;
4283     }
4284
4285   cost = get_computation_cost (data, use, cand, false, &depends_on,
4286                                NULL, &inv_expr_id);
4287
4288   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4289                    inv_expr_id);
4290
4291   return !infinite_cost_p (cost);
4292 }
4293
4294 /* Determines cost of basing replacement of USE on CAND in an address.  */
4295
4296 static bool
4297 determine_use_iv_cost_address (struct ivopts_data *data,
4298                                struct iv_use *use, struct iv_cand *cand)
4299 {
4300   bitmap depends_on;
4301   bool can_autoinc;
4302   int inv_expr_id = -1;
4303   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4304                                          &can_autoinc, &inv_expr_id);
4305
4306   if (cand->ainc_use == use)
4307     {
4308       if (can_autoinc)
4309         cost.cost -= cand->cost_step;
4310       /* If we generated the candidate solely for exploiting autoincrement
4311          opportunities, and it turns out it can't be used, set the cost to
4312          infinity to make sure we ignore it.  */
4313       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4314         cost = infinite_cost;
4315     }
4316   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4317                    inv_expr_id);
4318
4319   return !infinite_cost_p (cost);
4320 }
4321
4322 /* Computes value of candidate CAND at position AT in iteration NITER, and
4323    stores it to VAL.  */
4324
4325 static void
4326 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4327                aff_tree *val)
4328 {
4329   aff_tree step, delta, nit;
4330   struct iv *iv = cand->iv;
4331   tree type = TREE_TYPE (iv->base);
4332   tree steptype = type;
4333   if (POINTER_TYPE_P (type))
4334     steptype = sizetype;
4335
4336   tree_to_aff_combination (iv->step, steptype, &step);
4337   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4338   aff_combination_convert (&nit, steptype);
4339   aff_combination_mult (&nit, &step, &delta);
4340   if (stmt_after_increment (loop, cand, at))
4341     aff_combination_add (&delta, &step);
4342
4343   tree_to_aff_combination (iv->base, type, val);
4344   aff_combination_add (val, &delta);
4345 }
4346
4347 /* Returns period of induction variable iv.  */
4348
4349 static tree
4350 iv_period (struct iv *iv)
4351 {
4352   tree step = iv->step, period, type;
4353   tree pow2div;
4354
4355   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4356
4357   type = unsigned_type_for (TREE_TYPE (step));
4358   /* Period of the iv is lcm (step, type_range)/step -1,
4359      i.e., N*type_range/step - 1. Since type range is power
4360      of two, N == (step >> num_of_ending_zeros_binary (step),
4361      so the final result is
4362
4363        (type_range >> num_of_ending_zeros_binary (step)) - 1
4364
4365   */
4366   pow2div = num_ending_zeros (step);
4367
4368   period = build_low_bits_mask (type,
4369                                 (TYPE_PRECISION (type)
4370                                  - tree_low_cst (pow2div, 1)));
4371
4372   return period;
4373 }
4374
4375 /* Returns the comparison operator used when eliminating the iv USE.  */
4376
4377 static enum tree_code
4378 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4379 {
4380   struct loop *loop = data->current_loop;
4381   basic_block ex_bb;
4382   edge exit;
4383
4384   ex_bb = gimple_bb (use->stmt);
4385   exit = EDGE_SUCC (ex_bb, 0);
4386   if (flow_bb_inside_loop_p (loop, exit->dest))
4387     exit = EDGE_SUCC (ex_bb, 1);
4388
4389   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4390 }
4391
4392 /* Check whether it is possible to express the condition in USE by comparison
4393    of candidate CAND.  If so, store the value compared with to BOUND.  */
4394
4395 static bool
4396 may_eliminate_iv (struct ivopts_data *data,
4397                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4398 {
4399   basic_block ex_bb;
4400   edge exit;
4401   tree nit, period;
4402   struct loop *loop = data->current_loop;
4403   aff_tree bnd;
4404   struct tree_niter_desc *desc = NULL;
4405
4406   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4407     return false;
4408
4409   /* For now works only for exits that dominate the loop latch.
4410      TODO: extend to other conditions inside loop body.  */
4411   ex_bb = gimple_bb (use->stmt);
4412   if (use->stmt != last_stmt (ex_bb)
4413       || gimple_code (use->stmt) != GIMPLE_COND
4414       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4415     return false;
4416
4417   exit = EDGE_SUCC (ex_bb, 0);
4418   if (flow_bb_inside_loop_p (loop, exit->dest))
4419     exit = EDGE_SUCC (ex_bb, 1);
4420   if (flow_bb_inside_loop_p (loop, exit->dest))
4421     return false;
4422
4423   nit = niter_for_exit (data, exit, &desc);
4424   if (!nit)
4425     return false;
4426
4427   /* Determine whether we can use the variable to test the exit condition.
4428      This is the case iff the period of the induction variable is greater
4429      than the number of iterations for which the exit condition is true.  */
4430   period = iv_period (cand->iv);
4431
4432   /* If the number of iterations is constant, compare against it directly.  */
4433   if (TREE_CODE (nit) == INTEGER_CST)
4434     {
4435       /* See cand_value_at.  */
4436       if (stmt_after_increment (loop, cand, use->stmt))
4437         {
4438           if (!tree_int_cst_lt (nit, period))
4439             return false;
4440         }
4441       else
4442         {
4443           if (tree_int_cst_lt (period, nit))
4444             return false;
4445         }
4446     }
4447
4448   /* If not, and if this is the only possible exit of the loop, see whether
4449      we can get a conservative estimate on the number of iterations of the
4450      entire loop and compare against that instead.  */
4451   else
4452     {
4453       double_int period_value, max_niter;
4454
4455       max_niter = desc->max;
4456       if (stmt_after_increment (loop, cand, use->stmt))
4457         max_niter = double_int_add (max_niter, double_int_one);
4458       period_value = tree_to_double_int (period);
4459       if (double_int_ucmp (max_niter, period_value) > 0)
4460         {
4461           /* See if we can take advantage of infered loop bound information.  */
4462           if (loop_only_exit_p (loop, exit))
4463             {
4464               if (!estimated_loop_iterations (loop, true, &max_niter))
4465                 return false;
4466               /* The loop bound is already adjusted by adding 1.  */
4467               if (double_int_ucmp (max_niter, period_value) > 0)
4468                 return false;
4469             }
4470           else
4471             return false;
4472         }
4473     }
4474
4475   cand_value_at (loop, cand, use->stmt, nit, &bnd);
4476
4477   *bound = aff_combination_to_tree (&bnd);
4478   /* It is unlikely that computing the number of iterations using division
4479      would be more profitable than keeping the original induction variable.  */
4480   if (expression_expensive_p (*bound))
4481     return false;
4482   return true;
4483 }
4484
4485  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4486     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4487
4488 static int
4489 parm_decl_cost (struct ivopts_data *data, tree bound)
4490 {
4491   tree sbound = bound;
4492   STRIP_NOPS (sbound);
4493
4494   if (TREE_CODE (sbound) == SSA_NAME
4495       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4496       && gimple_nop_p (SSA_NAME_DEF_STMT (sbound))
4497       && data->body_includes_call)
4498     return COSTS_N_INSNS (1);
4499
4500   return 0;
4501 }
4502
4503 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4504
4505 static bool
4506 determine_use_iv_cost_condition (struct ivopts_data *data,
4507                                  struct iv_use *use, struct iv_cand *cand)
4508 {
4509   tree bound = NULL_TREE;
4510   struct iv *cmp_iv;
4511   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4512   comp_cost elim_cost, express_cost, cost, bound_cost;
4513   bool ok;
4514   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4515   tree *control_var, *bound_cst;
4516
4517   /* Only consider real candidates.  */
4518   if (!cand->iv)
4519     {
4520       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4521       return false;
4522     }
4523
4524   /* Try iv elimination.  */
4525   if (may_eliminate_iv (data, use, cand, &bound))
4526     {
4527       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4528       if (elim_cost.cost == 0)
4529         elim_cost.cost = parm_decl_cost (data, bound);
4530       else if (TREE_CODE (bound) == INTEGER_CST)
4531         elim_cost.cost = 0;
4532       /* If we replace a loop condition 'i < n' with 'p < base + n',
4533          depends_on_elim will have 'base' and 'n' set, which implies
4534          that both 'base' and 'n' will be live during the loop.  More likely,
4535          'base + n' will be loop invariant, resulting in only one live value
4536          during the loop.  So in that case we clear depends_on_elim and set
4537         elim_inv_expr_id instead.  */
4538       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4539         {
4540           elim_inv_expr_id = get_expr_id (data, bound);
4541           bitmap_clear (depends_on_elim);
4542         }
4543       /* The bound is a loop invariant, so it will be only computed
4544          once.  */
4545       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4546     }
4547   else
4548     elim_cost = infinite_cost;
4549
4550   /* Try expressing the original giv.  If it is compared with an invariant,
4551      note that we cannot get rid of it.  */
4552   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4553                               NULL, &cmp_iv);
4554   gcc_assert (ok);
4555
4556   /* When the condition is a comparison of the candidate IV against
4557      zero, prefer this IV.
4558
4559      TODO: The constant that we're substracting from the cost should
4560      be target-dependent.  This information should be added to the
4561      target costs for each backend.  */
4562   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4563       && integer_zerop (*bound_cst)
4564       && (operand_equal_p (*control_var, cand->var_after, 0)
4565           || operand_equal_p (*control_var, cand->var_before, 0)))
4566     elim_cost.cost -= 1;
4567
4568   express_cost = get_computation_cost (data, use, cand, false,
4569                                        &depends_on_express, NULL,
4570                                        &express_inv_expr_id);
4571   fd_ivopts_data = data;
4572   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4573
4574   /* Count the cost of the original bound as well.  */
4575   bound_cost = force_var_cost (data, *bound_cst, NULL);
4576   if (bound_cost.cost == 0)
4577     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4578   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4579     bound_cost.cost = 0;
4580   express_cost.cost += bound_cost.cost;
4581
4582   /* Choose the better approach, preferring the eliminated IV. */
4583   if (compare_costs (elim_cost, express_cost) <= 0)
4584     {
4585       cost = elim_cost;
4586       depends_on = depends_on_elim;
4587       depends_on_elim = NULL;
4588       inv_expr_id = elim_inv_expr_id;
4589     }
4590   else
4591     {
4592       cost = express_cost;
4593       depends_on = depends_on_express;
4594       depends_on_express = NULL;
4595       bound = NULL_TREE;
4596       inv_expr_id = express_inv_expr_id;
4597     }
4598
4599   set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4600
4601   if (depends_on_elim)
4602     BITMAP_FREE (depends_on_elim);
4603   if (depends_on_express)
4604     BITMAP_FREE (depends_on_express);
4605
4606   return !infinite_cost_p (cost);
4607 }
4608
4609 /* Determines cost of basing replacement of USE on CAND.  Returns false
4610    if USE cannot be based on CAND.  */
4611
4612 static bool
4613 determine_use_iv_cost (struct ivopts_data *data,
4614                        struct iv_use *use, struct iv_cand *cand)
4615 {
4616   switch (use->type)
4617     {
4618     case USE_NONLINEAR_EXPR:
4619       return determine_use_iv_cost_generic (data, use, cand);
4620
4621     case USE_ADDRESS:
4622       return determine_use_iv_cost_address (data, use, cand);
4623
4624     case USE_COMPARE:
4625       return determine_use_iv_cost_condition (data, use, cand);
4626
4627     default:
4628       gcc_unreachable ();
4629     }
4630 }
4631
4632 /* Return true if get_computation_cost indicates that autoincrement is
4633    a possibility for the pair of USE and CAND, false otherwise.  */
4634
4635 static bool
4636 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4637                            struct iv_cand *cand)
4638 {
4639   bitmap depends_on;
4640   bool can_autoinc;
4641   comp_cost cost;
4642
4643   if (use->type != USE_ADDRESS)
4644     return false;
4645
4646   cost = get_computation_cost (data, use, cand, true, &depends_on,
4647                                &can_autoinc, NULL);
4648
4649   BITMAP_FREE (depends_on);
4650
4651   return !infinite_cost_p (cost) && can_autoinc;
4652 }
4653
4654 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4655    use that allows autoincrement, and set their AINC_USE if possible.  */
4656
4657 static void
4658 set_autoinc_for_original_candidates (struct ivopts_data *data)
4659 {
4660   unsigned i, j;
4661
4662   for (i = 0; i < n_iv_cands (data); i++)
4663     {
4664       struct iv_cand *cand = iv_cand (data, i);
4665       struct iv_use *closest = NULL;
4666       if (cand->pos != IP_ORIGINAL)
4667         continue;
4668       for (j = 0; j < n_iv_uses (data); j++)
4669         {
4670           struct iv_use *use = iv_use (data, j);
4671           unsigned uid = gimple_uid (use->stmt);
4672           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4673               || uid > gimple_uid (cand->incremented_at))
4674             continue;
4675           if (closest == NULL || uid > gimple_uid (closest->stmt))
4676             closest = use;
4677         }
4678       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4679         continue;
4680       cand->ainc_use = closest;
4681     }
4682 }
4683
4684 /* Finds the candidates for the induction variables.  */
4685
4686 static void
4687 find_iv_candidates (struct ivopts_data *data)
4688 {
4689   /* Add commonly used ivs.  */
4690   add_standard_iv_candidates (data);
4691
4692   /* Add old induction variables.  */
4693   add_old_ivs_candidates (data);
4694
4695   /* Add induction variables derived from uses.  */
4696   add_derived_ivs_candidates (data);
4697
4698   set_autoinc_for_original_candidates (data);
4699
4700   /* Record the important candidates.  */
4701   record_important_candidates (data);
4702 }
4703
4704 /* Determines costs of basing the use of the iv on an iv candidate.  */
4705
4706 static void
4707 determine_use_iv_costs (struct ivopts_data *data)
4708 {
4709   unsigned i, j;
4710   struct iv_use *use;
4711   struct iv_cand *cand;
4712   bitmap to_clear = BITMAP_ALLOC (NULL);
4713
4714   alloc_use_cost_map (data);
4715
4716   for (i = 0; i < n_iv_uses (data); i++)
4717     {
4718       use = iv_use (data, i);
4719
4720       if (data->consider_all_candidates)
4721         {
4722           for (j = 0; j < n_iv_cands (data); j++)
4723             {
4724               cand = iv_cand (data, j);
4725               determine_use_iv_cost (data, use, cand);
4726             }
4727         }
4728       else
4729         {
4730           bitmap_iterator bi;
4731
4732           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4733             {
4734               cand = iv_cand (data, j);
4735               if (!determine_use_iv_cost (data, use, cand))
4736                 bitmap_set_bit (to_clear, j);
4737             }
4738
4739           /* Remove the candidates for that the cost is infinite from
4740              the list of related candidates.  */
4741           bitmap_and_compl_into (use->related_cands, to_clear);
4742           bitmap_clear (to_clear);
4743         }
4744     }
4745
4746   BITMAP_FREE (to_clear);
4747
4748   if (dump_file && (dump_flags & TDF_DETAILS))
4749     {
4750       fprintf (dump_file, "Use-candidate costs:\n");
4751
4752       for (i = 0; i < n_iv_uses (data); i++)
4753         {
4754           use = iv_use (data, i);
4755
4756           fprintf (dump_file, "Use %d:\n", i);
4757           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4758           for (j = 0; j < use->n_map_members; j++)
4759             {
4760               if (!use->cost_map[j].cand
4761                   || infinite_cost_p (use->cost_map[j].cost))
4762                 continue;
4763
4764               fprintf (dump_file, "  %d\t%d\t%d\t",
4765                        use->cost_map[j].cand->id,
4766                        use->cost_map[j].cost.cost,
4767                        use->cost_map[j].cost.complexity);
4768               if (use->cost_map[j].depends_on)
4769                 bitmap_print (dump_file,
4770                               use->cost_map[j].depends_on, "","");
4771               if (use->cost_map[j].inv_expr_id != -1)
4772                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4773               fprintf (dump_file, "\n");
4774             }
4775
4776           fprintf (dump_file, "\n");
4777         }
4778       fprintf (dump_file, "\n");
4779     }
4780 }
4781
4782 /* Determines cost of the candidate CAND.  */
4783
4784 static void
4785 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4786 {
4787   comp_cost cost_base;
4788   unsigned cost, cost_step;
4789   tree base;
4790
4791   if (!cand->iv)
4792     {
4793       cand->cost = 0;
4794       return;
4795     }
4796
4797   /* There are two costs associated with the candidate -- its increment
4798      and its initialization.  The second is almost negligible for any loop
4799      that rolls enough, so we take it just very little into account.  */
4800
4801   base = cand->iv->base;
4802   cost_base = force_var_cost (data, base, NULL);
4803   /* It will be exceptional that the iv register happens to be initialized with
4804      the proper value at no cost.  In general, there will at least be a regcopy
4805      or a const set.  */
4806   if (cost_base.cost == 0)
4807     cost_base.cost = COSTS_N_INSNS (1);
4808   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4809
4810   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4811
4812   /* Prefer the original ivs unless we may gain something by replacing it.
4813      The reason is to make debugging simpler; so this is not relevant for
4814      artificial ivs created by other optimization passes.  */
4815   if (cand->pos != IP_ORIGINAL
4816       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4817     cost++;
4818
4819   /* Prefer not to insert statements into latch unless there are some
4820      already (so that we do not create unnecessary jumps).  */
4821   if (cand->pos == IP_END
4822       && empty_block_p (ip_end_pos (data->current_loop)))
4823     cost++;
4824
4825   cand->cost = cost;
4826   cand->cost_step = cost_step;
4827 }
4828
4829 /* Determines costs of computation of the candidates.  */
4830
4831 static void
4832 determine_iv_costs (struct ivopts_data *data)
4833 {
4834   unsigned i;
4835
4836   if (dump_file && (dump_flags & TDF_DETAILS))
4837     {
4838       fprintf (dump_file, "Candidate costs:\n");
4839       fprintf (dump_file, "  cand\tcost\n");
4840     }
4841
4842   for (i = 0; i < n_iv_cands (data); i++)
4843     {
4844       struct iv_cand *cand = iv_cand (data, i);
4845
4846       determine_iv_cost (data, cand);
4847
4848       if (dump_file && (dump_flags & TDF_DETAILS))
4849         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4850     }
4851
4852   if (dump_file && (dump_flags & TDF_DETAILS))
4853     fprintf (dump_file, "\n");
4854 }
4855
4856 /* Calculates cost for having SIZE induction variables.  */
4857
4858 static unsigned
4859 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4860 {
4861   /* We add size to the cost, so that we prefer eliminating ivs
4862      if possible.  */
4863   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4864                                             data->body_includes_call);
4865 }
4866
4867 /* For each size of the induction variable set determine the penalty.  */
4868
4869 static void
4870 determine_set_costs (struct ivopts_data *data)
4871 {
4872   unsigned j, n;
4873   gimple phi;
4874   gimple_stmt_iterator psi;
4875   tree op;
4876   struct loop *loop = data->current_loop;
4877   bitmap_iterator bi;
4878
4879   if (dump_file && (dump_flags & TDF_DETAILS))
4880     {
4881       fprintf (dump_file, "Global costs:\n");
4882       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4883       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
4884       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
4885       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
4886     }
4887
4888   n = 0;
4889   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4890     {
4891       phi = gsi_stmt (psi);
4892       op = PHI_RESULT (phi);
4893
4894       if (!is_gimple_reg (op))
4895         continue;
4896
4897       if (get_iv (data, op))
4898         continue;
4899
4900       n++;
4901     }
4902
4903   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4904     {
4905       struct version_info *info = ver_info (data, j);
4906
4907       if (info->inv_id && info->has_nonlin_use)
4908         n++;
4909     }
4910
4911   data->regs_used = n;
4912   if (dump_file && (dump_flags & TDF_DETAILS))
4913     fprintf (dump_file, "  regs_used %d\n", n);
4914
4915   if (dump_file && (dump_flags & TDF_DETAILS))
4916     {
4917       fprintf (dump_file, "  cost for size:\n");
4918       fprintf (dump_file, "  ivs\tcost\n");
4919       for (j = 0; j <= 2 * target_avail_regs; j++)
4920         fprintf (dump_file, "  %d\t%d\n", j,
4921                  ivopts_global_cost_for_size (data, j));
4922       fprintf (dump_file, "\n");
4923     }
4924 }
4925
4926 /* Returns true if A is a cheaper cost pair than B.  */
4927
4928 static bool
4929 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4930 {
4931   int cmp;
4932
4933   if (!a)
4934     return false;
4935
4936   if (!b)
4937     return true;
4938
4939   cmp = compare_costs (a->cost, b->cost);
4940   if (cmp < 0)
4941     return true;
4942
4943   if (cmp > 0)
4944     return false;
4945
4946   /* In case the costs are the same, prefer the cheaper candidate.  */
4947   if (a->cand->cost < b->cand->cost)
4948     return true;
4949
4950   return false;
4951 }
4952
4953
4954 /* Returns candidate by that USE is expressed in IVS.  */
4955
4956 static struct cost_pair *
4957 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4958 {
4959   return ivs->cand_for_use[use->id];
4960 }
4961
4962 /* Computes the cost field of IVS structure.  */
4963
4964 static void
4965 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4966 {
4967   comp_cost cost = ivs->cand_use_cost;
4968
4969   cost.cost += ivs->cand_cost;
4970
4971   cost.cost += ivopts_global_cost_for_size (data,
4972                                             ivs->n_regs + ivs->num_used_inv_expr);
4973
4974   ivs->cost = cost;
4975 }
4976
4977 /* Remove invariants in set INVS to set IVS.  */
4978
4979 static void
4980 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4981 {
4982   bitmap_iterator bi;
4983   unsigned iid;
4984
4985   if (!invs)
4986     return;
4987
4988   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4989     {
4990       ivs->n_invariant_uses[iid]--;
4991       if (ivs->n_invariant_uses[iid] == 0)
4992         ivs->n_regs--;
4993     }
4994 }
4995
4996 /* Set USE not to be expressed by any candidate in IVS.  */
4997
4998 static void
4999 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5000                  struct iv_use *use)
5001 {
5002   unsigned uid = use->id, cid;
5003   struct cost_pair *cp;
5004
5005   cp = ivs->cand_for_use[uid];
5006   if (!cp)
5007     return;
5008   cid = cp->cand->id;
5009
5010   ivs->bad_uses++;
5011   ivs->cand_for_use[uid] = NULL;
5012   ivs->n_cand_uses[cid]--;
5013
5014   if (ivs->n_cand_uses[cid] == 0)
5015     {
5016       bitmap_clear_bit (ivs->cands, cid);
5017       /* Do not count the pseudocandidates.  */
5018       if (cp->cand->iv)
5019         ivs->n_regs--;
5020       ivs->n_cands--;
5021       ivs->cand_cost -= cp->cand->cost;
5022
5023       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5024     }
5025
5026   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5027
5028   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5029
5030   if (cp->inv_expr_id != -1)
5031     {
5032       ivs->used_inv_expr[cp->inv_expr_id]--;
5033       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5034         ivs->num_used_inv_expr--;
5035     }
5036   iv_ca_recount_cost (data, ivs);
5037 }
5038
5039 /* Add invariants in set INVS to set IVS.  */
5040
5041 static void
5042 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5043 {
5044   bitmap_iterator bi;
5045   unsigned iid;
5046
5047   if (!invs)
5048     return;
5049
5050   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5051     {
5052       ivs->n_invariant_uses[iid]++;
5053       if (ivs->n_invariant_uses[iid] == 1)
5054         ivs->n_regs++;
5055     }
5056 }
5057
5058 /* Set cost pair for USE in set IVS to CP.  */
5059
5060 static void
5061 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5062               struct iv_use *use, struct cost_pair *cp)
5063 {
5064   unsigned uid = use->id, cid;
5065
5066   if (ivs->cand_for_use[uid] == cp)
5067     return;
5068
5069   if (ivs->cand_for_use[uid])
5070     iv_ca_set_no_cp (data, ivs, use);
5071
5072   if (cp)
5073     {
5074       cid = cp->cand->id;
5075
5076       ivs->bad_uses--;
5077       ivs->cand_for_use[uid] = cp;
5078       ivs->n_cand_uses[cid]++;
5079       if (ivs->n_cand_uses[cid] == 1)
5080         {
5081           bitmap_set_bit (ivs->cands, cid);
5082           /* Do not count the pseudocandidates.  */
5083           if (cp->cand->iv)
5084             ivs->n_regs++;
5085           ivs->n_cands++;
5086           ivs->cand_cost += cp->cand->cost;
5087
5088           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5089         }
5090
5091       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5092       iv_ca_set_add_invariants (ivs, cp->depends_on);
5093
5094       if (cp->inv_expr_id != -1)
5095         {
5096           ivs->used_inv_expr[cp->inv_expr_id]++;
5097           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5098             ivs->num_used_inv_expr++;
5099         }
5100       iv_ca_recount_cost (data, ivs);
5101     }
5102 }
5103
5104 /* Extend set IVS by expressing USE by some of the candidates in it
5105    if possible. All important candidates will be considered
5106    if IMPORTANT_CANDIDATES is true.  */
5107
5108 static void
5109 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5110                struct iv_use *use, bool important_candidates)
5111 {
5112   struct cost_pair *best_cp = NULL, *cp;
5113   bitmap_iterator bi;
5114   bitmap cands;
5115   unsigned i;
5116
5117   gcc_assert (ivs->upto >= use->id);
5118
5119   if (ivs->upto == use->id)
5120     {
5121       ivs->upto++;
5122       ivs->bad_uses++;
5123     }
5124
5125   cands = (important_candidates ? data->important_candidates : ivs->cands);
5126   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5127     {
5128       struct iv_cand *cand = iv_cand (data, i);
5129
5130       cp = get_use_iv_cost (data, use, cand);
5131
5132       if (cheaper_cost_pair (cp, best_cp))
5133         best_cp = cp;
5134     }
5135
5136   iv_ca_set_cp (data, ivs, use, best_cp);
5137 }
5138
5139 /* Get cost for assignment IVS.  */
5140
5141 static comp_cost
5142 iv_ca_cost (struct iv_ca *ivs)
5143 {
5144   /* This was a conditional expression but it triggered a bug in
5145      Sun C 5.5.  */
5146   if (ivs->bad_uses)
5147     return infinite_cost;
5148   else
5149     return ivs->cost;
5150 }
5151
5152 /* Returns true if all dependences of CP are among invariants in IVS.  */
5153
5154 static bool
5155 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5156 {
5157   unsigned i;
5158   bitmap_iterator bi;
5159
5160   if (!cp->depends_on)
5161     return true;
5162
5163   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5164     {
5165       if (ivs->n_invariant_uses[i] == 0)
5166         return false;
5167     }
5168
5169   return true;
5170 }
5171
5172 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5173    it before NEXT_CHANGE.  */
5174
5175 static struct iv_ca_delta *
5176 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5177                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5178 {
5179   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5180
5181   change->use = use;
5182   change->old_cp = old_cp;
5183   change->new_cp = new_cp;
5184   change->next_change = next_change;
5185
5186   return change;
5187 }
5188
5189 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5190    are rewritten.  */
5191
5192 static struct iv_ca_delta *
5193 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5194 {
5195   struct iv_ca_delta *last;
5196
5197   if (!l2)
5198     return l1;
5199
5200   if (!l1)
5201     return l2;
5202
5203   for (last = l1; last->next_change; last = last->next_change)
5204     continue;
5205   last->next_change = l2;
5206
5207   return l1;
5208 }
5209
5210 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5211
5212 static struct iv_ca_delta *
5213 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5214 {
5215   struct iv_ca_delta *act, *next, *prev = NULL;
5216   struct cost_pair *tmp;
5217
5218   for (act = delta; act; act = next)
5219     {
5220       next = act->next_change;
5221       act->next_change = prev;
5222       prev = act;
5223
5224       tmp = act->old_cp;
5225       act->old_cp = act->new_cp;
5226       act->new_cp = tmp;
5227     }
5228
5229   return prev;
5230 }
5231
5232 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5233    reverted instead.  */
5234
5235 static void
5236 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5237                     struct iv_ca_delta *delta, bool forward)
5238 {
5239   struct cost_pair *from, *to;
5240   struct iv_ca_delta *act;
5241
5242   if (!forward)
5243     delta = iv_ca_delta_reverse (delta);
5244
5245   for (act = delta; act; act = act->next_change)
5246     {
5247       from = act->old_cp;
5248       to = act->new_cp;
5249       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5250       iv_ca_set_cp (data, ivs, act->use, to);
5251     }
5252
5253   if (!forward)
5254     iv_ca_delta_reverse (delta);
5255 }
5256
5257 /* Returns true if CAND is used in IVS.  */
5258
5259 static bool
5260 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5261 {
5262   return ivs->n_cand_uses[cand->id] > 0;
5263 }
5264
5265 /* Returns number of induction variable candidates in the set IVS.  */
5266
5267 static unsigned
5268 iv_ca_n_cands (struct iv_ca *ivs)
5269 {
5270   return ivs->n_cands;
5271 }
5272
5273 /* Free the list of changes DELTA.  */
5274
5275 static void
5276 iv_ca_delta_free (struct iv_ca_delta **delta)
5277 {
5278   struct iv_ca_delta *act, *next;
5279
5280   for (act = *delta; act; act = next)
5281     {
5282       next = act->next_change;
5283       free (act);
5284     }
5285
5286   *delta = NULL;
5287 }
5288
5289 /* Allocates new iv candidates assignment.  */
5290
5291 static struct iv_ca *
5292 iv_ca_new (struct ivopts_data *data)
5293 {
5294   struct iv_ca *nw = XNEW (struct iv_ca);
5295
5296   nw->upto = 0;
5297   nw->bad_uses = 0;
5298   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5299   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5300   nw->cands = BITMAP_ALLOC (NULL);
5301   nw->n_cands = 0;
5302   nw->n_regs = 0;
5303   nw->cand_use_cost = zero_cost;
5304   nw->cand_cost = 0;
5305   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5306   nw->cost = zero_cost;
5307   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5308   nw->num_used_inv_expr = 0;
5309
5310   return nw;
5311 }
5312
5313 /* Free memory occupied by the set IVS.  */
5314
5315 static void
5316 iv_ca_free (struct iv_ca **ivs)
5317 {
5318   free ((*ivs)->cand_for_use);
5319   free ((*ivs)->n_cand_uses);
5320   BITMAP_FREE ((*ivs)->cands);
5321   free ((*ivs)->n_invariant_uses);
5322   free ((*ivs)->used_inv_expr);
5323   free (*ivs);
5324   *ivs = NULL;
5325 }
5326
5327 /* Dumps IVS to FILE.  */
5328
5329 static void
5330 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5331 {
5332   const char *pref = "  invariants ";
5333   unsigned i;
5334   comp_cost cost = iv_ca_cost (ivs);
5335
5336   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5337   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5338            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5339   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5340
5341    for (i = 0; i < ivs->upto; i++)
5342     {
5343       struct iv_use *use = iv_use (data, i);
5344       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5345       if (cp)
5346         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5347                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5348       else
5349         fprintf (file, "   use:%d --> ??\n", use->id);
5350     }
5351
5352   for (i = 1; i <= data->max_inv_id; i++)
5353     if (ivs->n_invariant_uses[i])
5354       {
5355         fprintf (file, "%s%d", pref, i);
5356         pref = ", ";
5357       }
5358   fprintf (file, "\n\n");
5359 }
5360
5361 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5362    new set, and store differences in DELTA.  Number of induction variables
5363    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5364    the function will try to find a solution with mimimal iv candidates.  */
5365
5366 static comp_cost
5367 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5368               struct iv_cand *cand, struct iv_ca_delta **delta,
5369               unsigned *n_ivs, bool min_ncand)
5370 {
5371   unsigned i;
5372   comp_cost cost;
5373   struct iv_use *use;
5374   struct cost_pair *old_cp, *new_cp;
5375
5376   *delta = NULL;
5377   for (i = 0; i < ivs->upto; i++)
5378     {
5379       use = iv_use (data, i);
5380       old_cp = iv_ca_cand_for_use (ivs, use);
5381
5382       if (old_cp
5383           && old_cp->cand == cand)
5384         continue;
5385
5386       new_cp = get_use_iv_cost (data, use, cand);
5387       if (!new_cp)
5388         continue;
5389
5390       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5391         continue;
5392
5393       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5394         continue;
5395
5396       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5397     }
5398
5399   iv_ca_delta_commit (data, ivs, *delta, true);
5400   cost = iv_ca_cost (ivs);
5401   if (n_ivs)
5402     *n_ivs = iv_ca_n_cands (ivs);
5403   iv_ca_delta_commit (data, ivs, *delta, false);
5404
5405   return cost;
5406 }
5407
5408 /* Try narrowing set IVS by removing CAND.  Return the cost of
5409    the new set and store the differences in DELTA.  */
5410
5411 static comp_cost
5412 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5413               struct iv_cand *cand, struct iv_ca_delta **delta)
5414 {
5415   unsigned i, ci;
5416   struct iv_use *use;
5417   struct cost_pair *old_cp, *new_cp, *cp;
5418   bitmap_iterator bi;
5419   struct iv_cand *cnd;
5420   comp_cost cost;
5421
5422   *delta = NULL;
5423   for (i = 0; i < n_iv_uses (data); i++)
5424     {
5425       use = iv_use (data, i);
5426
5427       old_cp = iv_ca_cand_for_use (ivs, use);
5428       if (old_cp->cand != cand)
5429         continue;
5430
5431       new_cp = NULL;
5432
5433       if (data->consider_all_candidates)
5434         {
5435           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5436             {
5437               if (ci == cand->id)
5438                 continue;
5439
5440               cnd = iv_cand (data, ci);
5441
5442               cp = get_use_iv_cost (data, use, cnd);
5443               if (!cp)
5444                 continue;
5445
5446               if (!iv_ca_has_deps (ivs, cp))
5447                 continue;
5448
5449               if (!cheaper_cost_pair (cp, new_cp))
5450                 continue;
5451
5452               new_cp = cp;
5453             }
5454         }
5455       else
5456         {
5457           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5458             {
5459               if (ci == cand->id)
5460                 continue;
5461
5462               cnd = iv_cand (data, ci);
5463
5464               cp = get_use_iv_cost (data, use, cnd);
5465               if (!cp)
5466                 continue;
5467               if (!iv_ca_has_deps (ivs, cp))
5468                 continue;
5469
5470               if (!cheaper_cost_pair (cp, new_cp))
5471                 continue;
5472
5473               new_cp = cp;
5474             }
5475         }
5476
5477       if (!new_cp)
5478         {
5479           iv_ca_delta_free (delta);
5480           return infinite_cost;
5481         }
5482
5483       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5484     }
5485
5486   iv_ca_delta_commit (data, ivs, *delta, true);
5487   cost = iv_ca_cost (ivs);
5488   iv_ca_delta_commit (data, ivs, *delta, false);
5489
5490   return cost;
5491 }
5492
5493 /* Try optimizing the set of candidates IVS by removing candidates different
5494    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5495    differences in DELTA.  */
5496
5497 static comp_cost
5498 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5499              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5500 {
5501   bitmap_iterator bi;
5502   struct iv_ca_delta *act_delta, *best_delta;
5503   unsigned i;
5504   comp_cost best_cost, acost;
5505   struct iv_cand *cand;
5506
5507   best_delta = NULL;
5508   best_cost = iv_ca_cost (ivs);
5509
5510   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5511     {
5512       cand = iv_cand (data, i);
5513
5514       if (cand == except_cand)
5515         continue;
5516
5517       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5518
5519       if (compare_costs (acost, best_cost) < 0)
5520         {
5521           best_cost = acost;
5522           iv_ca_delta_free (&best_delta);
5523           best_delta = act_delta;
5524         }
5525       else
5526         iv_ca_delta_free (&act_delta);
5527     }
5528
5529   if (!best_delta)
5530     {
5531       *delta = NULL;
5532       return best_cost;
5533     }
5534
5535   /* Recurse to possibly remove other unnecessary ivs.  */
5536   iv_ca_delta_commit (data, ivs, best_delta, true);
5537   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5538   iv_ca_delta_commit (data, ivs, best_delta, false);
5539   *delta = iv_ca_delta_join (best_delta, *delta);
5540   return best_cost;
5541 }
5542
5543 /* Tries to extend the sets IVS in the best possible way in order
5544    to express the USE.  If ORIGINALP is true, prefer candidates from
5545    the original set of IVs, otherwise favor important candidates not
5546    based on any memory object.  */
5547
5548 static bool
5549 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5550                   struct iv_use *use, bool originalp)
5551 {
5552   comp_cost best_cost, act_cost;
5553   unsigned i;
5554   bitmap_iterator bi;
5555   struct iv_cand *cand;
5556   struct iv_ca_delta *best_delta = NULL, *act_delta;
5557   struct cost_pair *cp;
5558
5559   iv_ca_add_use (data, ivs, use, false);
5560   best_cost = iv_ca_cost (ivs);
5561
5562   cp = iv_ca_cand_for_use (ivs, use);
5563   if (!cp)
5564     {
5565       ivs->upto--;
5566       ivs->bad_uses--;
5567       iv_ca_add_use (data, ivs, use, true);
5568       best_cost = iv_ca_cost (ivs);
5569       cp = iv_ca_cand_for_use (ivs, use);
5570     }
5571   if (cp)
5572     {
5573       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5574       iv_ca_set_no_cp (data, ivs, use);
5575     }
5576
5577   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5578      first try important candidates not based on any memory object.  Only if
5579      this fails, try the specific ones.  Rationale -- in loops with many
5580      variables the best choice often is to use just one generic biv.  If we
5581      added here many ivs specific to the uses, the optimization algorithm later
5582      would be likely to get stuck in a local minimum, thus causing us to create
5583      too many ivs.  The approach from few ivs to more seems more likely to be
5584      successful -- starting from few ivs, replacing an expensive use by a
5585      specific iv should always be a win.  */
5586   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5587     {
5588       cand = iv_cand (data, i);
5589
5590       if (originalp && cand->pos !=IP_ORIGINAL)
5591         continue;
5592
5593       if (!originalp && cand->iv->base_object != NULL_TREE)
5594         continue;
5595
5596       if (iv_ca_cand_used_p (ivs, cand))
5597         continue;
5598
5599       cp = get_use_iv_cost (data, use, cand);
5600       if (!cp)
5601         continue;
5602
5603       iv_ca_set_cp (data, ivs, use, cp);
5604       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5605                                true);
5606       iv_ca_set_no_cp (data, ivs, use);
5607       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5608
5609       if (compare_costs (act_cost, best_cost) < 0)
5610         {
5611           best_cost = act_cost;
5612
5613           iv_ca_delta_free (&best_delta);
5614           best_delta = act_delta;
5615         }
5616       else
5617         iv_ca_delta_free (&act_delta);
5618     }
5619
5620   if (infinite_cost_p (best_cost))
5621     {
5622       for (i = 0; i < use->n_map_members; i++)
5623         {
5624           cp = use->cost_map + i;
5625           cand = cp->cand;
5626           if (!cand)
5627             continue;
5628
5629           /* Already tried this.  */
5630           if (cand->important)
5631             {
5632               if (originalp && cand->pos == IP_ORIGINAL)
5633                 continue;
5634               if (!originalp && cand->iv->base_object == NULL_TREE)
5635                 continue;
5636             }
5637
5638           if (iv_ca_cand_used_p (ivs, cand))
5639             continue;
5640
5641           act_delta = NULL;
5642           iv_ca_set_cp (data, ivs, use, cp);
5643           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5644           iv_ca_set_no_cp (data, ivs, use);
5645           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5646                                        cp, act_delta);
5647
5648           if (compare_costs (act_cost, best_cost) < 0)
5649             {
5650               best_cost = act_cost;
5651
5652               if (best_delta)
5653                 iv_ca_delta_free (&best_delta);
5654               best_delta = act_delta;
5655             }
5656           else
5657             iv_ca_delta_free (&act_delta);
5658         }
5659     }
5660
5661   iv_ca_delta_commit (data, ivs, best_delta, true);
5662   iv_ca_delta_free (&best_delta);
5663
5664   return !infinite_cost_p (best_cost);
5665 }
5666
5667 /* Finds an initial assignment of candidates to uses.  */
5668
5669 static struct iv_ca *
5670 get_initial_solution (struct ivopts_data *data, bool originalp)
5671 {
5672   struct iv_ca *ivs = iv_ca_new (data);
5673   unsigned i;
5674
5675   for (i = 0; i < n_iv_uses (data); i++)
5676     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5677       {
5678         iv_ca_free (&ivs);
5679         return NULL;
5680       }
5681
5682   return ivs;
5683 }
5684
5685 /* Tries to improve set of induction variables IVS.  */
5686
5687 static bool
5688 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5689 {
5690   unsigned i, n_ivs;
5691   comp_cost acost, best_cost = iv_ca_cost (ivs);
5692   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5693   struct iv_cand *cand;
5694
5695   /* Try extending the set of induction variables by one.  */
5696   for (i = 0; i < n_iv_cands (data); i++)
5697     {
5698       cand = iv_cand (data, i);
5699
5700       if (iv_ca_cand_used_p (ivs, cand))
5701         continue;
5702
5703       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5704       if (!act_delta)
5705         continue;
5706
5707       /* If we successfully added the candidate and the set is small enough,
5708          try optimizing it by removing other candidates.  */
5709       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5710         {
5711           iv_ca_delta_commit (data, ivs, act_delta, true);
5712           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5713           iv_ca_delta_commit (data, ivs, act_delta, false);
5714           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5715         }
5716
5717       if (compare_costs (acost, best_cost) < 0)
5718         {
5719           best_cost = acost;
5720           iv_ca_delta_free (&best_delta);
5721           best_delta = act_delta;
5722         }
5723       else
5724         iv_ca_delta_free (&act_delta);
5725     }
5726
5727   if (!best_delta)
5728     {
5729       /* Try removing the candidates from the set instead.  */
5730       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5731
5732       /* Nothing more we can do.  */
5733       if (!best_delta)
5734         return false;
5735     }
5736
5737   iv_ca_delta_commit (data, ivs, best_delta, true);
5738   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5739   iv_ca_delta_free (&best_delta);
5740   return true;
5741 }
5742
5743 /* Attempts to find the optimal set of induction variables.  We do simple
5744    greedy heuristic -- we try to replace at most one candidate in the selected
5745    solution and remove the unused ivs while this improves the cost.  */
5746
5747 static struct iv_ca *
5748 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5749 {
5750   struct iv_ca *set;
5751
5752   /* Get the initial solution.  */
5753   set = get_initial_solution (data, originalp);
5754   if (!set)
5755     {
5756       if (dump_file && (dump_flags & TDF_DETAILS))
5757         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5758       return NULL;
5759     }
5760
5761   if (dump_file && (dump_flags & TDF_DETAILS))
5762     {
5763       fprintf (dump_file, "Initial set of candidates:\n");
5764       iv_ca_dump (data, dump_file, set);
5765     }
5766
5767   while (try_improve_iv_set (data, set))
5768     {
5769       if (dump_file && (dump_flags & TDF_DETAILS))
5770         {
5771           fprintf (dump_file, "Improved to:\n");
5772           iv_ca_dump (data, dump_file, set);
5773         }
5774     }
5775
5776   return set;
5777 }
5778
5779 static struct iv_ca *
5780 find_optimal_iv_set (struct ivopts_data *data)
5781 {
5782   unsigned i;
5783   struct iv_ca *set, *origset;
5784   struct iv_use *use;
5785   comp_cost cost, origcost;
5786
5787   /* Determine the cost based on a strategy that starts with original IVs,
5788      and try again using a strategy that prefers candidates not based
5789      on any IVs.  */
5790   origset = find_optimal_iv_set_1 (data, true);
5791   set = find_optimal_iv_set_1 (data, false);
5792
5793   if (!origset && !set)
5794     return NULL;
5795
5796   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5797   cost = set ? iv_ca_cost (set) : infinite_cost;
5798
5799   if (dump_file && (dump_flags & TDF_DETAILS))
5800     {
5801       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5802                origcost.cost, origcost.complexity);
5803       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5804                cost.cost, cost.complexity);
5805     }
5806
5807   /* Choose the one with the best cost.  */
5808   if (compare_costs (origcost, cost) <= 0)
5809     {
5810       if (set)
5811         iv_ca_free (&set);
5812       set = origset;
5813     }
5814   else if (origset)
5815     iv_ca_free (&origset);
5816
5817   for (i = 0; i < n_iv_uses (data); i++)
5818     {
5819       use = iv_use (data, i);
5820       use->selected = iv_ca_cand_for_use (set, use)->cand;
5821     }
5822
5823   return set;
5824 }
5825
5826 /* Creates a new induction variable corresponding to CAND.  */
5827
5828 static void
5829 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5830 {
5831   gimple_stmt_iterator incr_pos;
5832   tree base;
5833   bool after = false;
5834
5835   if (!cand->iv)
5836     return;
5837
5838   switch (cand->pos)
5839     {
5840     case IP_NORMAL:
5841       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5842       break;
5843
5844     case IP_END:
5845       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5846       after = true;
5847       break;
5848
5849     case IP_AFTER_USE:
5850       after = true;
5851       /* fall through */
5852     case IP_BEFORE_USE:
5853       incr_pos = gsi_for_stmt (cand->incremented_at);
5854       break;
5855
5856     case IP_ORIGINAL:
5857       /* Mark that the iv is preserved.  */
5858       name_info (data, cand->var_before)->preserve_biv = true;
5859       name_info (data, cand->var_after)->preserve_biv = true;
5860
5861       /* Rewrite the increment so that it uses var_before directly.  */
5862       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5863       return;
5864     }
5865
5866   gimple_add_tmp_var (cand->var_before);
5867   add_referenced_var (cand->var_before);
5868
5869   base = unshare_expr (cand->iv->base);
5870
5871   create_iv (base, unshare_expr (cand->iv->step),
5872              cand->var_before, data->current_loop,
5873              &incr_pos, after, &cand->var_before, &cand->var_after);
5874 }
5875
5876 /* Creates new induction variables described in SET.  */
5877
5878 static void
5879 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5880 {
5881   unsigned i;
5882   struct iv_cand *cand;
5883   bitmap_iterator bi;
5884
5885   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5886     {
5887       cand = iv_cand (data, i);
5888       create_new_iv (data, cand);
5889     }
5890
5891   if (dump_file && (dump_flags & TDF_DETAILS))
5892     {
5893       fprintf (dump_file, "\nSelected IV set: \n");
5894       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5895         {
5896           cand = iv_cand (data, i);
5897           dump_cand (dump_file, cand);
5898         }
5899       fprintf (dump_file, "\n");
5900     }
5901 }
5902
5903 /* Rewrites USE (definition of iv used in a nonlinear expression)
5904    using candidate CAND.  */
5905
5906 static void
5907 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5908                             struct iv_use *use, struct iv_cand *cand)
5909 {
5910   tree comp;
5911   tree op, tgt;
5912   gimple ass;
5913   gimple_stmt_iterator bsi;
5914
5915   /* An important special case -- if we are asked to express value of
5916      the original iv by itself, just exit; there is no need to
5917      introduce a new computation (that might also need casting the
5918      variable to unsigned and back).  */
5919   if (cand->pos == IP_ORIGINAL
5920       && cand->incremented_at == use->stmt)
5921     {
5922       tree step, ctype, utype;
5923       enum tree_code incr_code = PLUS_EXPR, old_code;
5924
5925       gcc_assert (is_gimple_assign (use->stmt));
5926       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5927
5928       step = cand->iv->step;
5929       ctype = TREE_TYPE (step);
5930       utype = TREE_TYPE (cand->var_after);
5931       if (TREE_CODE (step) == NEGATE_EXPR)
5932         {
5933           incr_code = MINUS_EXPR;
5934           step = TREE_OPERAND (step, 0);
5935         }
5936
5937       /* Check whether we may leave the computation unchanged.
5938          This is the case only if it does not rely on other
5939          computations in the loop -- otherwise, the computation
5940          we rely upon may be removed in remove_unused_ivs,
5941          thus leading to ICE.  */
5942       old_code = gimple_assign_rhs_code (use->stmt);
5943       if (old_code == PLUS_EXPR
5944           || old_code == MINUS_EXPR
5945           || old_code == POINTER_PLUS_EXPR)
5946         {
5947           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5948             op = gimple_assign_rhs2 (use->stmt);
5949           else if (old_code != MINUS_EXPR
5950                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5951             op = gimple_assign_rhs1 (use->stmt);
5952           else
5953             op = NULL_TREE;
5954         }
5955       else
5956         op = NULL_TREE;
5957
5958       if (op
5959           && (TREE_CODE (op) == INTEGER_CST
5960               || operand_equal_p (op, step, 0)))
5961         return;
5962
5963       /* Otherwise, add the necessary computations to express
5964          the iv.  */
5965       op = fold_convert (ctype, cand->var_before);
5966       comp = fold_convert (utype,
5967                            build2 (incr_code, ctype, op,
5968                                    unshare_expr (step)));
5969     }
5970   else
5971     {
5972       comp = get_computation (data->current_loop, use, cand);
5973       gcc_assert (comp != NULL_TREE);
5974     }
5975
5976   switch (gimple_code (use->stmt))
5977     {
5978     case GIMPLE_PHI:
5979       tgt = PHI_RESULT (use->stmt);
5980
5981       /* If we should keep the biv, do not replace it.  */
5982       if (name_info (data, tgt)->preserve_biv)
5983         return;
5984
5985       bsi = gsi_after_labels (gimple_bb (use->stmt));
5986       break;
5987
5988     case GIMPLE_ASSIGN:
5989       tgt = gimple_assign_lhs (use->stmt);
5990       bsi = gsi_for_stmt (use->stmt);
5991       break;
5992
5993     default:
5994       gcc_unreachable ();
5995     }
5996
5997   if (!valid_gimple_rhs_p (comp)
5998       || (gimple_code (use->stmt) != GIMPLE_PHI
5999           /* We can't allow re-allocating the stmt as it might be pointed
6000              to still.  */
6001           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6002               >= gimple_num_ops (gsi_stmt (bsi)))))
6003     {
6004       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6005                                        true, GSI_SAME_STMT);
6006       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6007         {
6008           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6009           /* As this isn't a plain copy we have to reset alignment
6010              information.  */
6011           if (SSA_NAME_PTR_INFO (comp))
6012             {
6013               SSA_NAME_PTR_INFO (comp)->align = 1;
6014               SSA_NAME_PTR_INFO (comp)->misalign = 0;
6015             }
6016         }
6017     }
6018
6019   if (gimple_code (use->stmt) == GIMPLE_PHI)
6020     {
6021       ass = gimple_build_assign (tgt, comp);
6022       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6023
6024       bsi = gsi_for_stmt (use->stmt);
6025       remove_phi_node (&bsi, false);
6026     }
6027   else
6028     {
6029       gimple_assign_set_rhs_from_tree (&bsi, comp);
6030       use->stmt = gsi_stmt (bsi);
6031     }
6032 }
6033
6034 /* Copies the reference information from OLD_REF to NEW_REF.  */
6035
6036 static void
6037 copy_ref_info (tree new_ref, tree old_ref)
6038 {
6039   tree new_ptr_base = NULL_TREE;
6040
6041   TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
6042   TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
6043
6044   new_ptr_base = TREE_OPERAND (new_ref, 0);
6045
6046   /* We can transfer points-to information from an old pointer
6047      or decl base to the new one.  */
6048   if (new_ptr_base
6049       && TREE_CODE (new_ptr_base) == SSA_NAME
6050       && !SSA_NAME_PTR_INFO (new_ptr_base))
6051     {
6052       tree base = get_base_address (old_ref);
6053       if (!base)
6054         ;
6055       else if ((TREE_CODE (base) == MEM_REF
6056                 || TREE_CODE (base) == TARGET_MEM_REF)
6057                && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
6058                && SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)))
6059         {
6060           struct ptr_info_def *new_pi;
6061           duplicate_ssa_name_ptr_info
6062             (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
6063           new_pi = SSA_NAME_PTR_INFO (new_ptr_base);
6064           /* We have to be careful about transfering alignment information.  */
6065           if (TREE_CODE (old_ref) == MEM_REF
6066               && !(TREE_CODE (new_ref) == TARGET_MEM_REF
6067                    && (TMR_INDEX2 (new_ref)
6068                        || (TMR_STEP (new_ref)
6069                            && (TREE_INT_CST_LOW (TMR_STEP (new_ref))
6070                                < new_pi->align)))))
6071             {
6072               new_pi->misalign += double_int_sub (mem_ref_offset (old_ref),
6073                                                   mem_ref_offset (new_ref)).low;
6074               new_pi->misalign &= (new_pi->align - 1);
6075             }
6076           else
6077             {
6078               new_pi->align = 1;
6079               new_pi->misalign = 0;
6080             }
6081         }
6082       else if (TREE_CODE (base) == VAR_DECL
6083                || TREE_CODE (base) == PARM_DECL
6084                || TREE_CODE (base) == RESULT_DECL)
6085         {
6086           struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
6087           pt_solution_set_var (&pi->pt, base);
6088         }
6089     }
6090 }
6091
6092 /* Performs a peephole optimization to reorder the iv update statement with
6093    a mem ref to enable instruction combining in later phases. The mem ref uses
6094    the iv value before the update, so the reordering transformation requires
6095    adjustment of the offset. CAND is the selected IV_CAND.
6096
6097    Example:
6098
6099    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6100    iv2 = iv1 + 1;
6101
6102    if (t < val)      (1)
6103      goto L;
6104    goto Head;
6105
6106
6107    directly propagating t over to (1) will introduce overlapping live range
6108    thus increase register pressure. This peephole transform it into:
6109
6110
6111    iv2 = iv1 + 1;
6112    t = MEM_REF (base, iv2, 8, 8);
6113    if (t < val)
6114      goto L;
6115    goto Head;
6116 */
6117
6118 static void
6119 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6120 {
6121   tree var_after;
6122   gimple iv_update, stmt;
6123   basic_block bb;
6124   gimple_stmt_iterator gsi, gsi_iv;
6125
6126   if (cand->pos != IP_NORMAL)
6127     return;
6128
6129   var_after = cand->var_after;
6130   iv_update = SSA_NAME_DEF_STMT (var_after);
6131
6132   bb = gimple_bb (iv_update);
6133   gsi = gsi_last_nondebug_bb (bb);
6134   stmt = gsi_stmt (gsi);
6135
6136   /* Only handle conditional statement for now.  */
6137   if (gimple_code (stmt) != GIMPLE_COND)
6138     return;
6139
6140   gsi_prev_nondebug (&gsi);
6141   stmt = gsi_stmt (gsi);
6142   if (stmt != iv_update)
6143     return;
6144
6145   gsi_prev_nondebug (&gsi);
6146   if (gsi_end_p (gsi))
6147     return;
6148
6149   stmt = gsi_stmt (gsi);
6150   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6151     return;
6152
6153   if (stmt != use->stmt)
6154     return;
6155
6156   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6157     return;
6158
6159   if (dump_file && (dump_flags & TDF_DETAILS))
6160     {
6161       fprintf (dump_file, "Reordering \n");
6162       print_gimple_stmt (dump_file, iv_update, 0, 0);
6163       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6164       fprintf (dump_file, "\n");
6165     }
6166
6167   gsi = gsi_for_stmt (use->stmt);
6168   gsi_iv = gsi_for_stmt (iv_update);
6169   gsi_move_before (&gsi_iv, &gsi);
6170
6171   cand->pos = IP_BEFORE_USE;
6172   cand->incremented_at = use->stmt;
6173 }
6174
6175 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6176
6177 static void
6178 rewrite_use_address (struct ivopts_data *data,
6179                      struct iv_use *use, struct iv_cand *cand)
6180 {
6181   aff_tree aff;
6182   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6183   tree base_hint = NULL_TREE;
6184   tree ref, iv;
6185   bool ok;
6186
6187   adjust_iv_update_pos (cand, use);
6188   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6189   gcc_assert (ok);
6190   unshare_aff_combination (&aff);
6191
6192   /* To avoid undefined overflow problems, all IV candidates use unsigned
6193      integer types.  The drawback is that this makes it impossible for
6194      create_mem_ref to distinguish an IV that is based on a memory object
6195      from one that represents simply an offset.
6196
6197      To work around this problem, we pass a hint to create_mem_ref that
6198      indicates which variable (if any) in aff is an IV based on a memory
6199      object.  Note that we only consider the candidate.  If this is not
6200      based on an object, the base of the reference is in some subexpression
6201      of the use -- but these will use pointer types, so they are recognized
6202      by the create_mem_ref heuristics anyway.  */
6203   if (cand->iv->base_object)
6204     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6205
6206   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6207   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6208                         reference_alias_ptr_type (*use->op_p),
6209                         iv, base_hint, data->speed);
6210   copy_ref_info (ref, *use->op_p);
6211   *use->op_p = ref;
6212 }
6213
6214 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6215    candidate CAND.  */
6216
6217 static void
6218 rewrite_use_compare (struct ivopts_data *data,
6219                      struct iv_use *use, struct iv_cand *cand)
6220 {
6221   tree comp, *var_p, op, bound;
6222   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6223   enum tree_code compare;
6224   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6225   bool ok;
6226
6227   bound = cp->value;
6228   if (bound)
6229     {
6230       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6231       tree var_type = TREE_TYPE (var);
6232       gimple_seq stmts;
6233
6234       if (dump_file && (dump_flags & TDF_DETAILS))
6235         {
6236           fprintf (dump_file, "Replacing exit test: ");
6237           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6238         }
6239       compare = iv_elimination_compare (data, use);
6240       bound = unshare_expr (fold_convert (var_type, bound));
6241       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6242       if (stmts)
6243         gsi_insert_seq_on_edge_immediate (
6244                 loop_preheader_edge (data->current_loop),
6245                 stmts);
6246
6247       gimple_cond_set_lhs (use->stmt, var);
6248       gimple_cond_set_code (use->stmt, compare);
6249       gimple_cond_set_rhs (use->stmt, op);
6250       return;
6251     }
6252
6253   /* The induction variable elimination failed; just express the original
6254      giv.  */
6255   comp = get_computation (data->current_loop, use, cand);
6256   gcc_assert (comp != NULL_TREE);
6257
6258   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6259   gcc_assert (ok);
6260
6261   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6262                                      true, GSI_SAME_STMT);
6263 }
6264
6265 /* Rewrites USE using candidate CAND.  */
6266
6267 static void
6268 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6269 {
6270   switch (use->type)
6271     {
6272       case USE_NONLINEAR_EXPR:
6273         rewrite_use_nonlinear_expr (data, use, cand);
6274         break;
6275
6276       case USE_ADDRESS:
6277         rewrite_use_address (data, use, cand);
6278         break;
6279
6280       case USE_COMPARE:
6281         rewrite_use_compare (data, use, cand);
6282         break;
6283
6284       default:
6285         gcc_unreachable ();
6286     }
6287
6288   update_stmt (use->stmt);
6289 }
6290
6291 /* Rewrite the uses using the selected induction variables.  */
6292
6293 static void
6294 rewrite_uses (struct ivopts_data *data)
6295 {
6296   unsigned i;
6297   struct iv_cand *cand;
6298   struct iv_use *use;
6299
6300   for (i = 0; i < n_iv_uses (data); i++)
6301     {
6302       use = iv_use (data, i);
6303       cand = use->selected;
6304       gcc_assert (cand);
6305
6306       rewrite_use (data, use, cand);
6307     }
6308 }
6309
6310 /* Removes the ivs that are not used after rewriting.  */
6311
6312 static void
6313 remove_unused_ivs (struct ivopts_data *data)
6314 {
6315   unsigned j;
6316   bitmap_iterator bi;
6317   bitmap toremove = BITMAP_ALLOC (NULL);
6318
6319   /* Figure out an order in which to release SSA DEFs so that we don't
6320      release something that we'd have to propagate into a debug stmt
6321      afterwards.  */
6322   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6323     {
6324       struct version_info *info;
6325
6326       info = ver_info (data, j);
6327       if (info->iv
6328           && !integer_zerop (info->iv->step)
6329           && !info->inv_id
6330           && !info->iv->have_use_for
6331           && !info->preserve_biv)
6332         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6333     }
6334
6335   release_defs_bitset (toremove);
6336
6337   BITMAP_FREE (toremove);
6338 }
6339
6340 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6341    for pointer_map_traverse.  */
6342
6343 static bool
6344 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6345                       void *data ATTRIBUTE_UNUSED)
6346 {
6347   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6348
6349   free (niter);
6350   return true;
6351 }
6352
6353 /* Frees data allocated by the optimization of a single loop.  */
6354
6355 static void
6356 free_loop_data (struct ivopts_data *data)
6357 {
6358   unsigned i, j;
6359   bitmap_iterator bi;
6360   tree obj;
6361
6362   if (data->niters)
6363     {
6364       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6365       pointer_map_destroy (data->niters);
6366       data->niters = NULL;
6367     }
6368
6369   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6370     {
6371       struct version_info *info;
6372
6373       info = ver_info (data, i);
6374       free (info->iv);
6375       info->iv = NULL;
6376       info->has_nonlin_use = false;
6377       info->preserve_biv = false;
6378       info->inv_id = 0;
6379     }
6380   bitmap_clear (data->relevant);
6381   bitmap_clear (data->important_candidates);
6382
6383   for (i = 0; i < n_iv_uses (data); i++)
6384     {
6385       struct iv_use *use = iv_use (data, i);
6386
6387       free (use->iv);
6388       BITMAP_FREE (use->related_cands);
6389       for (j = 0; j < use->n_map_members; j++)
6390         if (use->cost_map[j].depends_on)
6391           BITMAP_FREE (use->cost_map[j].depends_on);
6392       free (use->cost_map);
6393       free (use);
6394     }
6395   VEC_truncate (iv_use_p, data->iv_uses, 0);
6396
6397   for (i = 0; i < n_iv_cands (data); i++)
6398     {
6399       struct iv_cand *cand = iv_cand (data, i);
6400
6401       free (cand->iv);
6402       if (cand->depends_on)
6403         BITMAP_FREE (cand->depends_on);
6404       free (cand);
6405     }
6406   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6407
6408   if (data->version_info_size < num_ssa_names)
6409     {
6410       data->version_info_size = 2 * num_ssa_names;
6411       free (data->version_info);
6412       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6413     }
6414
6415   data->max_inv_id = 0;
6416
6417   FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6418     SET_DECL_RTL (obj, NULL_RTX);
6419
6420   VEC_truncate (tree, decl_rtl_to_reset, 0);
6421
6422   htab_empty (data->inv_expr_tab);
6423   data->inv_expr_id = 0;
6424 }
6425
6426 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6427    loop tree.  */
6428
6429 static void
6430 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6431 {
6432   free_loop_data (data);
6433   free (data->version_info);
6434   BITMAP_FREE (data->relevant);
6435   BITMAP_FREE (data->important_candidates);
6436
6437   VEC_free (tree, heap, decl_rtl_to_reset);
6438   VEC_free (iv_use_p, heap, data->iv_uses);
6439   VEC_free (iv_cand_p, heap, data->iv_candidates);
6440   htab_delete (data->inv_expr_tab);
6441 }
6442
6443 /* Returns true if the loop body BODY includes any function calls.  */
6444
6445 static bool
6446 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6447 {
6448   gimple_stmt_iterator gsi;
6449   unsigned i;
6450
6451   for (i = 0; i < num_nodes; i++)
6452     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6453       {
6454         gimple stmt = gsi_stmt (gsi);
6455         if (is_gimple_call (stmt)
6456             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6457           return true;
6458       }
6459   return false;
6460 }
6461
6462 /* Optimizes the LOOP.  Returns true if anything changed.  */
6463
6464 static bool
6465 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6466 {
6467   bool changed = false;
6468   struct iv_ca *iv_ca;
6469   edge exit;
6470   basic_block *body;
6471
6472   gcc_assert (!data->niters);
6473   data->current_loop = loop;
6474   data->speed = optimize_loop_for_speed_p (loop);
6475
6476   if (dump_file && (dump_flags & TDF_DETAILS))
6477     {
6478       fprintf (dump_file, "Processing loop %d\n", loop->num);
6479
6480       exit = single_dom_exit (loop);
6481       if (exit)
6482         {
6483           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6484                    exit->src->index, exit->dest->index);
6485           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6486           fprintf (dump_file, "\n");
6487         }
6488
6489       fprintf (dump_file, "\n");
6490     }
6491
6492   body = get_loop_body (loop);
6493   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6494   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6495   free (body);
6496
6497   /* For each ssa name determines whether it behaves as an induction variable
6498      in some loop.  */
6499   if (!find_induction_variables (data))
6500     goto finish;
6501
6502   /* Finds interesting uses (item 1).  */
6503   find_interesting_uses (data);
6504   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6505     goto finish;
6506
6507   /* Finds candidates for the induction variables (item 2).  */
6508   find_iv_candidates (data);
6509
6510   /* Calculates the costs (item 3, part 1).  */
6511   determine_iv_costs (data);
6512   determine_use_iv_costs (data);
6513   determine_set_costs (data);
6514
6515   /* Find the optimal set of induction variables (item 3, part 2).  */
6516   iv_ca = find_optimal_iv_set (data);
6517   if (!iv_ca)
6518     goto finish;
6519   changed = true;
6520
6521   /* Create the new induction variables (item 4, part 1).  */
6522   create_new_ivs (data, iv_ca);
6523   iv_ca_free (&iv_ca);
6524
6525   /* Rewrite the uses (item 4, part 2).  */
6526   rewrite_uses (data);
6527
6528   /* Remove the ivs that are unused after rewriting.  */
6529   remove_unused_ivs (data);
6530
6531   /* We have changed the structure of induction variables; it might happen
6532      that definitions in the scev database refer to some of them that were
6533      eliminated.  */
6534   scev_reset ();
6535
6536 finish:
6537   free_loop_data (data);
6538
6539   return changed;
6540 }
6541
6542 /* Main entry point.  Optimizes induction variables in loops.  */
6543
6544 void
6545 tree_ssa_iv_optimize (void)
6546 {
6547   struct loop *loop;
6548   struct ivopts_data data;
6549   loop_iterator li;
6550
6551   tree_ssa_iv_optimize_init (&data);
6552
6553   /* Optimize the loops starting with the innermost ones.  */
6554   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6555     {
6556       if (dump_file && (dump_flags & TDF_DETAILS))
6557         flow_loop_dump (loop, dump_file, NULL, 1);
6558
6559       tree_ssa_iv_optimize_loop (&data, loop);
6560     }
6561
6562   tree_ssa_iv_optimize_finalize (&data);
6563 }