gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "output.h"
  73 #include "tree-pretty-print.h"
  74 #include "gimple-pretty-print.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "tree-pass.h"
  80 #include "ggc.h"
  81 #include "insn-config.h"
  82 #include "recog.h"
  83 #include "pointer-set.h"
  84 #include "hashtab.h"
  85 #include "tree-chrec.h"
  86 #include "tree-scalar-evolution.h"
  87 #include "cfgloop.h"
  88 #include "params.h"
  89 #include "langhooks.h"
  90 #include "tree-affine.h"
  91 #include "target.h"
  92 #include "tree-inline.h"
  93 #include "tree-ssa-propagate.h"
  94
  95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  96    cost of different addressing modes.  This should be moved to a TBD
  97    interface between the GIMPLE and RTL worlds.  */
  98 #include "expr.h"
  99
 100 /* The infinite cost.  */
 101 #define INFTY 10000000
 102
 103 #define AVG_LOOP_NITER(LOOP) 5
 104
 105 /* Returns the expected number of loop iterations for LOOP.
 106    The average trip count is computed from profile data if it
 107    exists. */
 108
 109 static inline HOST_WIDE_INT
 110 avg_loop_niter (struct loop *loop)
 111 {
 112   HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
 113   if (niter == -1)
 114     return AVG_LOOP_NITER (loop);
 115
 116   return niter;
 117 }
 118
 119 /* Representation of the induction variable.  */
 120 struct iv
 121 {
 122   tree base;            /* Initial value of the iv.  */
 123   tree base_object;     /* A memory object to that the induction variable points.  */
 124   tree step;            /* Step of the iv (constant only).  */
 125   tree ssa_name;        /* The ssa name with the value.  */
 126   bool biv_p;           /* Is it a biv?  */
 127   bool have_use_for;    /* Do we already have a use for it?  */
 128   unsigned use_id;      /* The identifier in the use if it is the case.  */
 129 };
 130
 131 /* Per-ssa version information (induction variable descriptions, etc.).  */
 132 struct version_info
 133 {
 134   tree name;            /* The ssa name.  */
 135   struct iv *iv;        /* Induction variable description.  */
 136   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 137                            an expression that is not an induction variable.  */
 138   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 139   unsigned inv_id;      /* Id of an invariant.  */
 140 };
 141
 142 /* Types of uses.  */
 143 enum use_type
 144 {
 145   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 146   USE_ADDRESS,          /* Use in an address.  */
 147   USE_COMPARE           /* Use is a compare.  */
 148 };
 149
 150 /* Cost of a computation.  */
 151 typedef struct
 152 {
 153   int cost;             /* The runtime cost.  */
 154   unsigned complexity;  /* The estimate of the complexity of the code for
 155                            the computation (in no concrete units --
 156                            complexity field should be larger for more
 157                            complex expressions and addressing modes).  */
 158 } comp_cost;
 159
 160 static const comp_cost zero_cost = {0, 0};
 161 static const comp_cost infinite_cost = {INFTY, INFTY};
 162
 163 /* The candidate - cost pair.  */
 164 struct cost_pair
 165 {
 166   struct iv_cand *cand; /* The candidate.  */
 167   comp_cost cost;       /* The cost.  */
 168   bitmap depends_on;    /* The list of invariants that have to be
 169                            preserved.  */
 170   tree value;           /* For final value elimination, the expression for
 171                            the final value of the iv.  For iv elimination,
 172                            the new bound to compare with.  */
 173   int inv_expr_id;      /* Loop invariant expression id.  */
 174 };
 175
 176 /* Use.  */
 177 struct iv_use
 178 {
 179   unsigned id;          /* The id of the use.  */
 180   enum use_type type;   /* Type of the use.  */
 181   struct iv *iv;        /* The induction variable it is based on.  */
 182   gimple stmt;          /* Statement in that it occurs.  */
 183   tree *op_p;           /* The place where it occurs.  */
 184   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 185                            important ones.  */
 186
 187   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 188   struct cost_pair *cost_map;
 189                         /* The costs wrto the iv candidates.  */
 190
 191   struct iv_cand *selected;
 192                         /* The selected candidate.  */
 193 };
 194
 195 /* The position where the iv is computed.  */
 196 enum iv_position
 197 {
 198   IP_NORMAL,            /* At the end, just before the exit condition.  */
 199   IP_END,               /* At the end of the latch block.  */
 200   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 201   IP_AFTER_USE,         /* Immediately after a specific use.  */
 202   IP_ORIGINAL           /* The original biv.  */
 203 };
 204
 205 /* The induction variable candidate.  */
 206 struct iv_cand
 207 {
 208   unsigned id;          /* The number of the candidate.  */
 209   bool important;       /* Whether this is an "important" candidate, i.e. such
 210                            that it should be considered by all uses.  */
 211   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 212   gimple incremented_at;/* For original biv, the statement where it is
 213                            incremented.  */
 214   tree var_before;      /* The variable used for it before increment.  */
 215   tree var_after;       /* The variable used for it after increment.  */
 216   struct iv *iv;        /* The value of the candidate.  NULL for
 217                            "pseudocandidate" used to indicate the possibility
 218                            to replace the final value of an iv by direct
 219                            computation of the value.  */
 220   unsigned cost;        /* Cost of the candidate.  */
 221   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 222   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 223                               where it is incremented.  */
 224   bitmap depends_on;    /* The list of invariants that are used in step of the
 225                            biv.  */
 226 };
 227
 228 /* Loop invariant expression hashtable entry.  */
 229 struct iv_inv_expr_ent
 230 {
 231   tree expr;
 232   int id;
 233   hashval_t hash;
 234 };
 235
 236 /* The data used by the induction variable optimizations.  */
 237
 238 typedef struct iv_use *iv_use_p;
 239 DEF_VEC_P(iv_use_p);
 240 DEF_VEC_ALLOC_P(iv_use_p,heap);
 241
 242 typedef struct iv_cand *iv_cand_p;
 243 DEF_VEC_P(iv_cand_p);
 244 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 245
 246 struct ivopts_data
 247 {
 248   /* The currently optimized loop.  */
 249   struct loop *current_loop;
 250
 251   /* Numbers of iterations for all exits of the current loop.  */
 252   struct pointer_map_t *niters;
 253
 254   /* Number of registers used in it.  */
 255   unsigned regs_used;
 256
 257   /* The size of version_info array allocated.  */
 258   unsigned version_info_size;
 259
 260   /* The array of information for the ssa names.  */
 261   struct version_info *version_info;
 262
 263   /* The hashtable of loop invariant expressions created
 264      by ivopt.  */
 265   htab_t inv_expr_tab;
 266
 267   /* Loop invariant expression id.  */
 268   int inv_expr_id;
 269
 270   /* The bitmap of indices in version_info whose value was changed.  */
 271   bitmap relevant;
 272
 273   /* The uses of induction variables.  */
 274   VEC(iv_use_p,heap) *iv_uses;
 275
 276   /* The candidates.  */
 277   VEC(iv_cand_p,heap) *iv_candidates;
 278
 279   /* A bitmap of important candidates.  */
 280   bitmap important_candidates;
 281
 282   /* The maximum invariant id.  */
 283   unsigned max_inv_id;
 284
 285   /* Whether to consider just related and important candidates when replacing a
 286      use.  */
 287   bool consider_all_candidates;
 288
 289   /* Are we optimizing for speed?  */
 290   bool speed;
 291
 292   /* Whether the loop body includes any function calls.  */
 293   bool body_includes_call;
 294 };
 295
 296 /* An assignment of iv candidates to uses.  */
 297
 298 struct iv_ca
 299 {
 300   /* The number of uses covered by the assignment.  */
 301   unsigned upto;
 302
 303   /* Number of uses that cannot be expressed by the candidates in the set.  */
 304   unsigned bad_uses;
 305
 306   /* Candidate assigned to a use, together with the related costs.  */
 307   struct cost_pair **cand_for_use;
 308
 309   /* Number of times each candidate is used.  */
 310   unsigned *n_cand_uses;
 311
 312   /* The candidates used.  */
 313   bitmap cands;
 314
 315   /* The number of candidates in the set.  */
 316   unsigned n_cands;
 317
 318   /* Total number of registers needed.  */
 319   unsigned n_regs;
 320
 321   /* Total cost of expressing uses.  */
 322   comp_cost cand_use_cost;
 323
 324   /* Total cost of candidates.  */
 325   unsigned cand_cost;
 326
 327   /* Number of times each invariant is used.  */
 328   unsigned *n_invariant_uses;
 329
 330   /* The array holding the number of uses of each loop
 331      invariant expressions created by ivopt.  */
 332   unsigned *used_inv_expr;
 333
 334   /* The number of created loop invariants.  */
 335   unsigned num_used_inv_expr;
 336
 337   /* Total cost of the assignment.  */
 338   comp_cost cost;
 339 };
 340
 341 /* Difference of two iv candidate assignments.  */
 342
 343 struct iv_ca_delta
 344 {
 345   /* Changed use.  */
 346   struct iv_use *use;
 347
 348   /* An old assignment (for rollback purposes).  */
 349   struct cost_pair *old_cp;
 350
 351   /* A new assignment.  */
 352   struct cost_pair *new_cp;
 353
 354   /* Next change in the list.  */
 355   struct iv_ca_delta *next_change;
 356 };
 357
 358 /* Bound on number of candidates below that all candidates are considered.  */
 359
 360 #define CONSIDER_ALL_CANDIDATES_BOUND \
 361   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 362
 363 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 364    optimizing such a loop would help, and it would take ages).  */
 365
 366 #define MAX_CONSIDERED_USES \
 367   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 368
 369 /* If there are at most this number of ivs in the set, try removing unnecessary
 370    ivs from the set always.  */
 371
 372 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 373   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 374
 375 /* The list of trees for that the decl_rtl field must be reset is stored
 376    here.  */
 377
 378 static VEC(tree,heap) *decl_rtl_to_reset;
 379
 380 /* Number of uses recorded in DATA.  */
 381
 382 static inline unsigned
 383 n_iv_uses (struct ivopts_data *data)
 384 {
 385   return VEC_length (iv_use_p, data->iv_uses);
 386 }
 387
 388 /* Ith use recorded in DATA.  */
 389
 390 static inline struct iv_use *
 391 iv_use (struct ivopts_data *data, unsigned i)
 392 {
 393   return VEC_index (iv_use_p, data->iv_uses, i);
 394 }
 395
 396 /* Number of candidates recorded in DATA.  */
 397
 398 static inline unsigned
 399 n_iv_cands (struct ivopts_data *data)
 400 {
 401   return VEC_length (iv_cand_p, data->iv_candidates);
 402 }
 403
 404 /* Ith candidate recorded in DATA.  */
 405
 406 static inline struct iv_cand *
 407 iv_cand (struct ivopts_data *data, unsigned i)
 408 {
 409   return VEC_index (iv_cand_p, data->iv_candidates, i);
 410 }
 411
 412 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 413
 414 edge
 415 single_dom_exit (struct loop *loop)
 416 {
 417   edge exit = single_exit (loop);
 418
 419   if (!exit)
 420     return NULL;
 421
 422   if (!just_once_each_iteration_p (loop, exit->src))
 423     return NULL;
 424
 425   return exit;
 426 }
 427
 428 /* Dumps information about the induction variable IV to FILE.  */
 429
 430 extern void dump_iv (FILE *, struct iv *);
 431 void
 432 dump_iv (FILE *file, struct iv *iv)
 433 {
 434   if (iv->ssa_name)
 435     {
 436       fprintf (file, "ssa name ");
 437       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 438       fprintf (file, "\n");
 439     }
 440
 441   fprintf (file, "  type ");
 442   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 443   fprintf (file, "\n");
 444
 445   if (iv->step)
 446     {
 447       fprintf (file, "  base ");
 448       print_generic_expr (file, iv->base, TDF_SLIM);
 449       fprintf (file, "\n");
 450
 451       fprintf (file, "  step ");
 452       print_generic_expr (file, iv->step, TDF_SLIM);
 453       fprintf (file, "\n");
 454     }
 455   else
 456     {
 457       fprintf (file, "  invariant ");
 458       print_generic_expr (file, iv->base, TDF_SLIM);
 459       fprintf (file, "\n");
 460     }
 461
 462   if (iv->base_object)
 463     {
 464       fprintf (file, "  base object ");
 465       print_generic_expr (file, iv->base_object, TDF_SLIM);
 466       fprintf (file, "\n");
 467     }
 468
 469   if (iv->biv_p)
 470     fprintf (file, "  is a biv\n");
 471 }
 472
 473 /* Dumps information about the USE to FILE.  */
 474
 475 extern void dump_use (FILE *, struct iv_use *);
 476 void
 477 dump_use (FILE *file, struct iv_use *use)
 478 {
 479   fprintf (file, "use %d\n", use->id);
 480
 481   switch (use->type)
 482     {
 483     case USE_NONLINEAR_EXPR:
 484       fprintf (file, "  generic\n");
 485       break;
 486
 487     case USE_ADDRESS:
 488       fprintf (file, "  address\n");
 489       break;
 490
 491     case USE_COMPARE:
 492       fprintf (file, "  compare\n");
 493       break;
 494
 495     default:
 496       gcc_unreachable ();
 497     }
 498
 499   fprintf (file, "  in statement ");
 500   print_gimple_stmt (file, use->stmt, 0, 0);
 501   fprintf (file, "\n");
 502
 503   fprintf (file, "  at position ");
 504   if (use->op_p)
 505     print_generic_expr (file, *use->op_p, TDF_SLIM);
 506   fprintf (file, "\n");
 507
 508   dump_iv (file, use->iv);
 509
 510   if (use->related_cands)
 511     {
 512       fprintf (file, "  related candidates ");
 513       dump_bitmap (file, use->related_cands);
 514     }
 515 }
 516
 517 /* Dumps information about the uses to FILE.  */
 518
 519 extern void dump_uses (FILE *, struct ivopts_data *);
 520 void
 521 dump_uses (FILE *file, struct ivopts_data *data)
 522 {
 523   unsigned i;
 524   struct iv_use *use;
 525
 526   for (i = 0; i < n_iv_uses (data); i++)
 527     {
 528       use = iv_use (data, i);
 529
 530       dump_use (file, use);
 531       fprintf (file, "\n");
 532     }
 533 }
 534
 535 /* Dumps information about induction variable candidate CAND to FILE.  */
 536
 537 extern void dump_cand (FILE *, struct iv_cand *);
 538 void
 539 dump_cand (FILE *file, struct iv_cand *cand)
 540 {
 541   struct iv *iv = cand->iv;
 542
 543   fprintf (file, "candidate %d%s\n",
 544            cand->id, cand->important ? " (important)" : "");
 545
 546   if (cand->depends_on)
 547     {
 548       fprintf (file, "  depends on ");
 549       dump_bitmap (file, cand->depends_on);
 550     }
 551
 552   if (!iv)
 553     {
 554       fprintf (file, "  final value replacement\n");
 555       return;
 556     }
 557
 558   if (cand->var_before)
 559     {
 560       fprintf (file, "  var_before ");
 561       print_generic_expr (file, cand->var_before, TDF_SLIM);
 562       fprintf (file, "\n");
 563     }
 564   if (cand->var_after)
 565     {
 566       fprintf (file, "  var_after ");
 567       print_generic_expr (file, cand->var_after, TDF_SLIM);
 568       fprintf (file, "\n");
 569     }
 570
 571   switch (cand->pos)
 572     {
 573     case IP_NORMAL:
 574       fprintf (file, "  incremented before exit test\n");
 575       break;
 576
 577     case IP_BEFORE_USE:
 578       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 579       break;
 580
 581     case IP_AFTER_USE:
 582       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 583       break;
 584
 585     case IP_END:
 586       fprintf (file, "  incremented at end\n");
 587       break;
 588
 589     case IP_ORIGINAL:
 590       fprintf (file, "  original biv\n");
 591       break;
 592     }
 593
 594   dump_iv (file, iv);
 595 }
 596
 597 /* Returns the info for ssa version VER.  */
 598
 599 static inline struct version_info *
 600 ver_info (struct ivopts_data *data, unsigned ver)
 601 {
 602   return data->version_info + ver;
 603 }
 604
 605 /* Returns the info for ssa name NAME.  */
 606
 607 static inline struct version_info *
 608 name_info (struct ivopts_data *data, tree name)
 609 {
 610   return ver_info (data, SSA_NAME_VERSION (name));
 611 }
 612
 613 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 614    emitted in LOOP.  */
 615
 616 static bool
 617 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 618 {
 619   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 620
 621   gcc_assert (bb);
 622
 623   if (sbb == loop->latch)
 624     return true;
 625
 626   if (sbb != bb)
 627     return false;
 628
 629   return stmt == last_stmt (bb);
 630 }
 631
 632 /* Returns true if STMT if after the place where the original induction
 633    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 634    if the positions are identical.  */
 635
 636 static bool
 637 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 638 {
 639   basic_block cand_bb = gimple_bb (cand->incremented_at);
 640   basic_block stmt_bb = gimple_bb (stmt);
 641
 642   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 643     return false;
 644
 645   if (stmt_bb != cand_bb)
 646     return true;
 647
 648   if (true_if_equal
 649       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 650     return true;
 651   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 652 }
 653
 654 /* Returns true if STMT if after the place where the induction variable
 655    CAND is incremented in LOOP.  */
 656
 657 static bool
 658 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 659 {
 660   switch (cand->pos)
 661     {
 662     case IP_END:
 663       return false;
 664
 665     case IP_NORMAL:
 666       return stmt_after_ip_normal_pos (loop, stmt);
 667
 668     case IP_ORIGINAL:
 669     case IP_AFTER_USE:
 670       return stmt_after_inc_pos (cand, stmt, false);
 671
 672     case IP_BEFORE_USE:
 673       return stmt_after_inc_pos (cand, stmt, true);
 674
 675     default:
 676       gcc_unreachable ();
 677     }
 678 }
 679
 680 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 681
 682 static bool
 683 abnormal_ssa_name_p (tree exp)
 684 {
 685   if (!exp)
 686     return false;
 687
 688   if (TREE_CODE (exp) != SSA_NAME)
 689     return false;
 690
 691   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 692 }
 693
 694 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 695    abnormal phi node.  Callback for for_each_index.  */
 696
 697 static bool
 698 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 699                                   void *data ATTRIBUTE_UNUSED)
 700 {
 701   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 702     {
 703       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 704         return false;
 705       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 706         return false;
 707     }
 708
 709   return !abnormal_ssa_name_p (*index);
 710 }
 711
 712 /* Returns true if EXPR contains a ssa name that occurs in an
 713    abnormal phi node.  */
 714
 715 bool
 716 contains_abnormal_ssa_name_p (tree expr)
 717 {
 718   enum tree_code code;
 719   enum tree_code_class codeclass;
 720
 721   if (!expr)
 722     return false;
 723
 724   code = TREE_CODE (expr);
 725   codeclass = TREE_CODE_CLASS (code);
 726
 727   if (code == SSA_NAME)
 728     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 729
 730   if (code == INTEGER_CST
 731       || is_gimple_min_invariant (expr))
 732     return false;
 733
 734   if (code == ADDR_EXPR)
 735     return !for_each_index (&TREE_OPERAND (expr, 0),
 736                             idx_contains_abnormal_ssa_name_p,
 737                             NULL);
 738
 739   if (code == COND_EXPR)
 740     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 741       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 742       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 743
 744   switch (codeclass)
 745     {
 746     case tcc_binary:
 747     case tcc_comparison:
 748       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 749         return true;
 750
 751       /* Fallthru.  */
 752     case tcc_unary:
 753       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 754         return true;
 755
 756       break;
 757
 758     default:
 759       gcc_unreachable ();
 760     }
 761
 762   return false;
 763 }
 764
 765 /*  Returns tree describing number of iterations determined from
 766     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 767
 768 static tree
 769 niter_for_exit (struct ivopts_data *data, edge exit,
 770                 struct tree_niter_desc **desc_p)
 771 {
 772   struct tree_niter_desc* desc = NULL;
 773   tree niter;
 774   void **slot;
 775
 776   if (!data->niters)
 777     {
 778       data->niters = pointer_map_create ();
 779       slot = NULL;
 780     }
 781   else
 782     slot = pointer_map_contains (data->niters, exit);
 783
 784   if (!slot)
 785     {
 786       /* Try to determine number of iterations.  We must know it
 787          unconditionally (i.e., without possibility of # of iterations
 788          being zero).  Also, we cannot safely work with ssa names that
 789          appear in phi nodes on abnormal edges, so that we do not create
 790          overlapping life ranges for them (PR 27283).  */
 791       desc = XNEW (struct tree_niter_desc);
 792       if (number_of_iterations_exit (data->current_loop,
 793                                      exit, desc, true)
 794           && integer_zerop (desc->may_be_zero)
 795           && !contains_abnormal_ssa_name_p (desc->niter))
 796         niter = desc->niter;
 797       else
 798         niter = NULL_TREE;
 799
 800       desc->niter = niter;
 801       slot = pointer_map_insert (data->niters, exit);
 802       *slot = desc;
 803     }
 804   else
 805     niter = ((struct tree_niter_desc *) *slot)->niter;
 806
 807   if (desc_p)
 808     *desc_p = (struct tree_niter_desc *) *slot;
 809   return niter;
 810 }
 811
 812 /* Returns tree describing number of iterations determined from
 813    single dominating exit of DATA->current_loop, or NULL if something
 814    goes wrong.  */
 815
 816 static tree
 817 niter_for_single_dom_exit (struct ivopts_data *data)
 818 {
 819   edge exit = single_dom_exit (data->current_loop);
 820
 821   if (!exit)
 822     return NULL;
 823
 824   return niter_for_exit (data, exit, NULL);
 825 }
 826
 827 /* Hash table equality function for expressions.  */
 828
 829 static int
 830 htab_inv_expr_eq (const void *ent1, const void *ent2)
 831 {
 832   const struct iv_inv_expr_ent *expr1 =
 833       (const struct iv_inv_expr_ent *)ent1;
 834   const struct iv_inv_expr_ent *expr2 =
 835       (const struct iv_inv_expr_ent *)ent2;
 836
 837   return operand_equal_p (expr1->expr, expr2->expr, 0);
 838 }
 839
 840 /* Hash function for loop invariant expressions.  */
 841
 842 static hashval_t
 843 htab_inv_expr_hash (const void *ent)
 844 {
 845   const struct iv_inv_expr_ent *expr =
 846       (const struct iv_inv_expr_ent *)ent;
 847   return expr->hash;
 848 }
 849
 850 /* Initializes data structures used by the iv optimization pass, stored
 851    in DATA.  */
 852
 853 static void
 854 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 855 {
 856   data->version_info_size = 2 * num_ssa_names;
 857   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 858   data->relevant = BITMAP_ALLOC (NULL);
 859   data->important_candidates = BITMAP_ALLOC (NULL);
 860   data->max_inv_id = 0;
 861   data->niters = NULL;
 862   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 863   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 864   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 865                                     htab_inv_expr_eq, free);
 866   data->inv_expr_id = 0;
 867   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 868 }
 869
 870 /* Returns a memory object to that EXPR points.  In case we are able to
 871    determine that it does not point to any such object, NULL is returned.  */
 872
 873 static tree
 874 determine_base_object (tree expr)
 875 {
 876   enum tree_code code = TREE_CODE (expr);
 877   tree base, obj;
 878
 879   /* If this is a pointer casted to any type, we need to determine
 880      the base object for the pointer; so handle conversions before
 881      throwing away non-pointer expressions.  */
 882   if (CONVERT_EXPR_P (expr))
 883     return determine_base_object (TREE_OPERAND (expr, 0));
 884
 885   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 886     return NULL_TREE;
 887
 888   switch (code)
 889     {
 890     case INTEGER_CST:
 891       return NULL_TREE;
 892
 893     case ADDR_EXPR:
 894       obj = TREE_OPERAND (expr, 0);
 895       base = get_base_address (obj);
 896
 897       if (!base)
 898         return expr;
 899
 900       if (TREE_CODE (base) == MEM_REF)
 901         return determine_base_object (TREE_OPERAND (base, 0));
 902
 903       return fold_convert (ptr_type_node,
 904                            build_fold_addr_expr (base));
 905
 906     case POINTER_PLUS_EXPR:
 907       return determine_base_object (TREE_OPERAND (expr, 0));
 908
 909     case PLUS_EXPR:
 910     case MINUS_EXPR:
 911       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 912       gcc_unreachable ();
 913
 914     default:
 915       return fold_convert (ptr_type_node, expr);
 916     }
 917 }
 918
 919 /* Allocates an induction variable with given initial value BASE and step STEP
 920    for loop LOOP.  */
 921
 922 static struct iv *
 923 alloc_iv (tree base, tree step)
 924 {
 925   struct iv *iv = XCNEW (struct iv);
 926   gcc_assert (step != NULL_TREE);
 927
 928   iv->base = base;
 929   iv->base_object = determine_base_object (base);
 930   iv->step = step;
 931   iv->biv_p = false;
 932   iv->have_use_for = false;
 933   iv->use_id = 0;
 934   iv->ssa_name = NULL_TREE;
 935
 936   return iv;
 937 }
 938
 939 /* Sets STEP and BASE for induction variable IV.  */
 940
 941 static void
 942 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 943 {
 944   struct version_info *info = name_info (data, iv);
 945
 946   gcc_assert (!info->iv);
 947
 948   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 949   info->iv = alloc_iv (base, step);
 950   info->iv->ssa_name = iv;
 951 }
 952
 953 /* Finds induction variable declaration for VAR.  */
 954
 955 static struct iv *
 956 get_iv (struct ivopts_data *data, tree var)
 957 {
 958   basic_block bb;
 959   tree type = TREE_TYPE (var);
 960
 961   if (!POINTER_TYPE_P (type)
 962       && !INTEGRAL_TYPE_P (type))
 963     return NULL;
 964
 965   if (!name_info (data, var)->iv)
 966     {
 967       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 968
 969       if (!bb
 970           || !flow_bb_inside_loop_p (data->current_loop, bb))
 971         set_iv (data, var, var, build_int_cst (type, 0));
 972     }
 973
 974   return name_info (data, var)->iv;
 975 }
 976
 977 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 978    not define a simple affine biv with nonzero step.  */
 979
 980 static tree
 981 determine_biv_step (gimple phi)
 982 {
 983   struct loop *loop = gimple_bb (phi)->loop_father;
 984   tree name = PHI_RESULT (phi);
 985   affine_iv iv;
 986
 987   if (!is_gimple_reg (name))
 988     return NULL_TREE;
 989
 990   if (!simple_iv (loop, loop, name, &iv, true))
 991     return NULL_TREE;
 992
 993   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 994 }
 995
 996 /* Finds basic ivs.  */
 997
 998 static bool
 999 find_bivs (struct ivopts_data *data)
1000 {
1001   gimple phi;
1002   tree step, type, base;
1003   bool found = false;
1004   struct loop *loop = data->current_loop;
1005   gimple_stmt_iterator psi;
1006
1007   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1008     {
1009       phi = gsi_stmt (psi);
1010
1011       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1012         continue;
1013
1014       step = determine_biv_step (phi);
1015       if (!step)
1016         continue;
1017
1018       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1019       base = expand_simple_operations (base);
1020       if (contains_abnormal_ssa_name_p (base)
1021           || contains_abnormal_ssa_name_p (step))
1022         continue;
1023
1024       type = TREE_TYPE (PHI_RESULT (phi));
1025       base = fold_convert (type, base);
1026       if (step)
1027         {
1028           if (POINTER_TYPE_P (type))
1029             step = fold_convert (sizetype, step);
1030           else
1031             step = fold_convert (type, step);
1032         }
1033
1034       set_iv (data, PHI_RESULT (phi), base, step);
1035       found = true;
1036     }
1037
1038   return found;
1039 }
1040
1041 /* Marks basic ivs.  */
1042
1043 static void
1044 mark_bivs (struct ivopts_data *data)
1045 {
1046   gimple phi;
1047   tree var;
1048   struct iv *iv, *incr_iv;
1049   struct loop *loop = data->current_loop;
1050   basic_block incr_bb;
1051   gimple_stmt_iterator psi;
1052
1053   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1054     {
1055       phi = gsi_stmt (psi);
1056
1057       iv = get_iv (data, PHI_RESULT (phi));
1058       if (!iv)
1059         continue;
1060
1061       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1062       incr_iv = get_iv (data, var);
1063       if (!incr_iv)
1064         continue;
1065
1066       /* If the increment is in the subloop, ignore it.  */
1067       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1068       if (incr_bb->loop_father != data->current_loop
1069           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1070         continue;
1071
1072       iv->biv_p = true;
1073       incr_iv->biv_p = true;
1074     }
1075 }
1076
1077 /* Checks whether STMT defines a linear induction variable and stores its
1078    parameters to IV.  */
1079
1080 static bool
1081 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1082 {
1083   tree lhs;
1084   struct loop *loop = data->current_loop;
1085
1086   iv->base = NULL_TREE;
1087   iv->step = NULL_TREE;
1088
1089   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1090     return false;
1091
1092   lhs = gimple_assign_lhs (stmt);
1093   if (TREE_CODE (lhs) != SSA_NAME)
1094     return false;
1095
1096   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1097     return false;
1098   iv->base = expand_simple_operations (iv->base);
1099
1100   if (contains_abnormal_ssa_name_p (iv->base)
1101       || contains_abnormal_ssa_name_p (iv->step))
1102     return false;
1103
1104   return true;
1105 }
1106
1107 /* Finds general ivs in statement STMT.  */
1108
1109 static void
1110 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1111 {
1112   affine_iv iv;
1113
1114   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1115     return;
1116
1117   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1118 }
1119
1120 /* Finds general ivs in basic block BB.  */
1121
1122 static void
1123 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1124 {
1125   gimple_stmt_iterator bsi;
1126
1127   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1128     find_givs_in_stmt (data, gsi_stmt (bsi));
1129 }
1130
1131 /* Finds general ivs.  */
1132
1133 static void
1134 find_givs (struct ivopts_data *data)
1135 {
1136   struct loop *loop = data->current_loop;
1137   basic_block *body = get_loop_body_in_dom_order (loop);
1138   unsigned i;
1139
1140   for (i = 0; i < loop->num_nodes; i++)
1141     find_givs_in_bb (data, body[i]);
1142   free (body);
1143 }
1144
1145 /* For each ssa name defined in LOOP determines whether it is an induction
1146    variable and if so, its initial value and step.  */
1147
1148 static bool
1149 find_induction_variables (struct ivopts_data *data)
1150 {
1151   unsigned i;
1152   bitmap_iterator bi;
1153
1154   if (!find_bivs (data))
1155     return false;
1156
1157   find_givs (data);
1158   mark_bivs (data);
1159
1160   if (dump_file && (dump_flags & TDF_DETAILS))
1161     {
1162       tree niter = niter_for_single_dom_exit (data);
1163
1164       if (niter)
1165         {
1166           fprintf (dump_file, "  number of iterations ");
1167           print_generic_expr (dump_file, niter, TDF_SLIM);
1168           fprintf (dump_file, "\n\n");
1169         };
1170
1171       fprintf (dump_file, "Induction variables:\n\n");
1172
1173       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1174         {
1175           if (ver_info (data, i)->iv)
1176             dump_iv (dump_file, ver_info (data, i)->iv);
1177         }
1178     }
1179
1180   return true;
1181 }
1182
1183 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1184
1185 static struct iv_use *
1186 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1187             gimple stmt, enum use_type use_type)
1188 {
1189   struct iv_use *use = XCNEW (struct iv_use);
1190
1191   use->id = n_iv_uses (data);
1192   use->type = use_type;
1193   use->iv = iv;
1194   use->stmt = stmt;
1195   use->op_p = use_p;
1196   use->related_cands = BITMAP_ALLOC (NULL);
1197
1198   /* To avoid showing ssa name in the dumps, if it was not reset by the
1199      caller.  */
1200   iv->ssa_name = NULL_TREE;
1201
1202   if (dump_file && (dump_flags & TDF_DETAILS))
1203     dump_use (dump_file, use);
1204
1205   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1206
1207   return use;
1208 }
1209
1210 /* Checks whether OP is a loop-level invariant and if so, records it.
1211    NONLINEAR_USE is true if the invariant is used in a way we do not
1212    handle specially.  */
1213
1214 static void
1215 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1216 {
1217   basic_block bb;
1218   struct version_info *info;
1219
1220   if (TREE_CODE (op) != SSA_NAME
1221       || !is_gimple_reg (op))
1222     return;
1223
1224   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1225   if (bb
1226       && flow_bb_inside_loop_p (data->current_loop, bb))
1227     return;
1228
1229   info = name_info (data, op);
1230   info->name = op;
1231   info->has_nonlin_use |= nonlinear_use;
1232   if (!info->inv_id)
1233     info->inv_id = ++data->max_inv_id;
1234   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1235 }
1236
1237 /* Checks whether the use OP is interesting and if so, records it.  */
1238
1239 static struct iv_use *
1240 find_interesting_uses_op (struct ivopts_data *data, tree op)
1241 {
1242   struct iv *iv;
1243   struct iv *civ;
1244   gimple stmt;
1245   struct iv_use *use;
1246
1247   if (TREE_CODE (op) != SSA_NAME)
1248     return NULL;
1249
1250   iv = get_iv (data, op);
1251   if (!iv)
1252     return NULL;
1253
1254   if (iv->have_use_for)
1255     {
1256       use = iv_use (data, iv->use_id);
1257
1258       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1259       return use;
1260     }
1261
1262   if (integer_zerop (iv->step))
1263     {
1264       record_invariant (data, op, true);
1265       return NULL;
1266     }
1267   iv->have_use_for = true;
1268
1269   civ = XNEW (struct iv);
1270   *civ = *iv;
1271
1272   stmt = SSA_NAME_DEF_STMT (op);
1273   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1274               || is_gimple_assign (stmt));
1275
1276   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1277   iv->use_id = use->id;
1278
1279   return use;
1280 }
1281
1282 /* Given a condition in statement STMT, checks whether it is a compare
1283    of an induction variable and an invariant.  If this is the case,
1284    CONTROL_VAR is set to location of the iv, BOUND to the location of
1285    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1286    induction variable descriptions, and true is returned.  If this is not
1287    the case, CONTROL_VAR and BOUND are set to the arguments of the
1288    condition and false is returned.  */
1289
1290 static bool
1291 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1292                        tree **control_var, tree **bound,
1293                        struct iv **iv_var, struct iv **iv_bound)
1294 {
1295   /* The objects returned when COND has constant operands.  */
1296   static struct iv const_iv;
1297   static tree zero;
1298   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1299   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1300   bool ret = false;
1301
1302   if (gimple_code (stmt) == GIMPLE_COND)
1303     {
1304       op0 = gimple_cond_lhs_ptr (stmt);
1305       op1 = gimple_cond_rhs_ptr (stmt);
1306     }
1307   else
1308     {
1309       op0 = gimple_assign_rhs1_ptr (stmt);
1310       op1 = gimple_assign_rhs2_ptr (stmt);
1311     }
1312
1313   zero = integer_zero_node;
1314   const_iv.step = integer_zero_node;
1315
1316   if (TREE_CODE (*op0) == SSA_NAME)
1317     iv0 = get_iv (data, *op0);
1318   if (TREE_CODE (*op1) == SSA_NAME)
1319     iv1 = get_iv (data, *op1);
1320
1321   /* Exactly one of the compared values must be an iv, and the other one must
1322      be an invariant.  */
1323   if (!iv0 || !iv1)
1324     goto end;
1325
1326   if (integer_zerop (iv0->step))
1327     {
1328       /* Control variable may be on the other side.  */
1329       tmp_op = op0; op0 = op1; op1 = tmp_op;
1330       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1331     }
1332   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1333
1334 end:
1335   if (control_var)
1336     *control_var = op0;;
1337   if (iv_var)
1338     *iv_var = iv0;;
1339   if (bound)
1340     *bound = op1;
1341   if (iv_bound)
1342     *iv_bound = iv1;
1343
1344   return ret;
1345 }
1346
1347 /* Checks whether the condition in STMT is interesting and if so,
1348    records it.  */
1349
1350 static void
1351 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1352 {
1353   tree *var_p, *bound_p;
1354   struct iv *var_iv, *civ;
1355
1356   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1357     {
1358       find_interesting_uses_op (data, *var_p);
1359       find_interesting_uses_op (data, *bound_p);
1360       return;
1361     }
1362
1363   civ = XNEW (struct iv);
1364   *civ = *var_iv;
1365   record_use (data, NULL, civ, stmt, USE_COMPARE);
1366 }
1367
1368 /* Returns true if expression EXPR is obviously invariant in LOOP,
1369    i.e. if all its operands are defined outside of the LOOP.  LOOP
1370    should not be the function body.  */
1371
1372 bool
1373 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1374 {
1375   basic_block def_bb;
1376   unsigned i, len;
1377
1378   gcc_assert (loop_depth (loop) > 0);
1379
1380   if (is_gimple_min_invariant (expr))
1381     return true;
1382
1383   if (TREE_CODE (expr) == SSA_NAME)
1384     {
1385       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1386       if (def_bb
1387           && flow_bb_inside_loop_p (loop, def_bb))
1388         return false;
1389
1390       return true;
1391     }
1392
1393   if (!EXPR_P (expr))
1394     return false;
1395
1396   len = TREE_OPERAND_LENGTH (expr);
1397   for (i = 0; i < len; i++)
1398     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1399       return false;
1400
1401   return true;
1402 }
1403
1404 /* Returns true if statement STMT is obviously invariant in LOOP,
1405    i.e. if all its operands on the RHS are defined outside of the LOOP.
1406    LOOP should not be the function body.  */
1407
1408 bool
1409 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1410 {
1411   unsigned i;
1412   tree lhs;
1413
1414   gcc_assert (loop_depth (loop) > 0);
1415
1416   lhs = gimple_get_lhs (stmt);
1417   for (i = 0; i < gimple_num_ops (stmt); i++)
1418     {
1419       tree op = gimple_op (stmt, i);
1420       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1421         return false;
1422     }
1423
1424   return true;
1425 }
1426
1427 /* Cumulates the steps of indices into DATA and replaces their values with the
1428    initial ones.  Returns false when the value of the index cannot be determined.
1429    Callback for for_each_index.  */
1430
1431 struct ifs_ivopts_data
1432 {
1433   struct ivopts_data *ivopts_data;
1434   gimple stmt;
1435   tree step;
1436 };
1437
1438 static bool
1439 idx_find_step (tree base, tree *idx, void *data)
1440 {
1441   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1442   struct iv *iv;
1443   tree step, iv_base, iv_step, lbound, off;
1444   struct loop *loop = dta->ivopts_data->current_loop;
1445
1446   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF)
1447     return false;
1448
1449   /* If base is a component ref, require that the offset of the reference
1450      be invariant.  */
1451   if (TREE_CODE (base) == COMPONENT_REF)
1452     {
1453       off = component_ref_field_offset (base);
1454       return expr_invariant_in_loop_p (loop, off);
1455     }
1456
1457   /* If base is array, first check whether we will be able to move the
1458      reference out of the loop (in order to take its address in strength
1459      reduction).  In order for this to work we need both lower bound
1460      and step to be loop invariants.  */
1461   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1462     {
1463       /* Moreover, for a range, the size needs to be invariant as well.  */
1464       if (TREE_CODE (base) == ARRAY_RANGE_REF
1465           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1466         return false;
1467
1468       step = array_ref_element_size (base);
1469       lbound = array_ref_low_bound (base);
1470
1471       if (!expr_invariant_in_loop_p (loop, step)
1472           || !expr_invariant_in_loop_p (loop, lbound))
1473         return false;
1474     }
1475
1476   if (TREE_CODE (*idx) != SSA_NAME)
1477     return true;
1478
1479   iv = get_iv (dta->ivopts_data, *idx);
1480   if (!iv)
1481     return false;
1482
1483   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1484           *&x[0], which is not folded and does not trigger the
1485           ARRAY_REF path below.  */
1486   *idx = iv->base;
1487
1488   if (integer_zerop (iv->step))
1489     return true;
1490
1491   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1492     {
1493       step = array_ref_element_size (base);
1494
1495       /* We only handle addresses whose step is an integer constant.  */
1496       if (TREE_CODE (step) != INTEGER_CST)
1497         return false;
1498     }
1499   else
1500     /* The step for pointer arithmetics already is 1 byte.  */
1501     step = size_one_node;
1502
1503   iv_base = iv->base;
1504   iv_step = iv->step;
1505   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1506                             sizetype, &iv_base, &iv_step, dta->stmt,
1507                             false))
1508     {
1509       /* The index might wrap.  */
1510       return false;
1511     }
1512
1513   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1514   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1515
1516   return true;
1517 }
1518
1519 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1520    object is passed to it in DATA.  */
1521
1522 static bool
1523 idx_record_use (tree base, tree *idx,
1524                 void *vdata)
1525 {
1526   struct ivopts_data *data = (struct ivopts_data *) vdata;
1527   find_interesting_uses_op (data, *idx);
1528   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1529     {
1530       find_interesting_uses_op (data, array_ref_element_size (base));
1531       find_interesting_uses_op (data, array_ref_low_bound (base));
1532     }
1533   return true;
1534 }
1535
1536 /* If we can prove that TOP = cst * BOT for some constant cst,
1537    store cst to MUL and return true.  Otherwise return false.
1538    The returned value is always sign-extended, regardless of the
1539    signedness of TOP and BOT.  */
1540
1541 static bool
1542 constant_multiple_of (tree top, tree bot, double_int *mul)
1543 {
1544   tree mby;
1545   enum tree_code code;
1546   double_int res, p0, p1;
1547   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1548
1549   STRIP_NOPS (top);
1550   STRIP_NOPS (bot);
1551
1552   if (operand_equal_p (top, bot, 0))
1553     {
1554       *mul = double_int_one;
1555       return true;
1556     }
1557
1558   code = TREE_CODE (top);
1559   switch (code)
1560     {
1561     case MULT_EXPR:
1562       mby = TREE_OPERAND (top, 1);
1563       if (TREE_CODE (mby) != INTEGER_CST)
1564         return false;
1565
1566       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1567         return false;
1568
1569       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1570                               precision);
1571       return true;
1572
1573     case PLUS_EXPR:
1574     case MINUS_EXPR:
1575       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1576           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1577         return false;
1578
1579       if (code == MINUS_EXPR)
1580         p1 = double_int_neg (p1);
1581       *mul = double_int_sext (double_int_add (p0, p1), precision);
1582       return true;
1583
1584     case INTEGER_CST:
1585       if (TREE_CODE (bot) != INTEGER_CST)
1586         return false;
1587
1588       p0 = double_int_sext (tree_to_double_int (top), precision);
1589       p1 = double_int_sext (tree_to_double_int (bot), precision);
1590       if (double_int_zero_p (p1))
1591         return false;
1592       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1593                               precision);
1594       return double_int_zero_p (res);
1595
1596     default:
1597       return false;
1598     }
1599 }
1600
1601 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1602
1603 static bool
1604 may_be_unaligned_p (tree ref, tree step)
1605 {
1606   tree base;
1607   tree base_type;
1608   HOST_WIDE_INT bitsize;
1609   HOST_WIDE_INT bitpos;
1610   tree toffset;
1611   enum machine_mode mode;
1612   int unsignedp, volatilep;
1613   unsigned base_align;
1614
1615   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1616      thus they are not misaligned.  */
1617   if (TREE_CODE (ref) == TARGET_MEM_REF)
1618     return false;
1619
1620   /* The test below is basically copy of what expr.c:normal_inner_ref
1621      does to check whether the object must be loaded by parts when
1622      STRICT_ALIGNMENT is true.  */
1623   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1624                               &unsignedp, &volatilep, true);
1625   base_type = TREE_TYPE (base);
1626   base_align = TYPE_ALIGN (base_type);
1627
1628   if (mode != BLKmode)
1629     {
1630       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1631
1632       if (base_align < mode_align
1633           || (bitpos % mode_align) != 0
1634           || (bitpos % BITS_PER_UNIT) != 0)
1635         return true;
1636
1637       if (toffset
1638           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1639         return true;
1640
1641       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1642         return true;
1643     }
1644
1645   return false;
1646 }
1647
1648 /* Return true if EXPR may be non-addressable.   */
1649
1650 bool
1651 may_be_nonaddressable_p (tree expr)
1652 {
1653   switch (TREE_CODE (expr))
1654     {
1655     case TARGET_MEM_REF:
1656       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1657          target, thus they are always addressable.  */
1658       return false;
1659
1660     case COMPONENT_REF:
1661       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1662              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1663
1664     case VIEW_CONVERT_EXPR:
1665       /* This kind of view-conversions may wrap non-addressable objects
1666          and make them look addressable.  After some processing the
1667          non-addressability may be uncovered again, causing ADDR_EXPRs
1668          of inappropriate objects to be built.  */
1669       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1670           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1671         return true;
1672
1673       /* ... fall through ... */
1674
1675     case ARRAY_REF:
1676     case ARRAY_RANGE_REF:
1677       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1678
1679     CASE_CONVERT:
1680       return true;
1681
1682     default:
1683       break;
1684     }
1685
1686   return false;
1687 }
1688
1689 /* Finds addresses in *OP_P inside STMT.  */
1690
1691 static void
1692 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1693 {
1694   tree base = *op_p, step = size_zero_node;
1695   struct iv *civ;
1696   struct ifs_ivopts_data ifs_ivopts_data;
1697
1698   /* Do not play with volatile memory references.  A bit too conservative,
1699      perhaps, but safe.  */
1700   if (gimple_has_volatile_ops (stmt))
1701     goto fail;
1702
1703   /* Ignore bitfields for now.  Not really something terribly complicated
1704      to handle.  TODO.  */
1705   if (TREE_CODE (base) == BIT_FIELD_REF)
1706     goto fail;
1707
1708   base = unshare_expr (base);
1709
1710   if (TREE_CODE (base) == TARGET_MEM_REF)
1711     {
1712       tree type = build_pointer_type (TREE_TYPE (base));
1713       tree astep;
1714
1715       if (TMR_BASE (base)
1716           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1717         {
1718           civ = get_iv (data, TMR_BASE (base));
1719           if (!civ)
1720             goto fail;
1721
1722           TMR_BASE (base) = civ->base;
1723           step = civ->step;
1724         }
1725       if (TMR_INDEX (base)
1726           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1727         {
1728           civ = get_iv (data, TMR_INDEX (base));
1729           if (!civ)
1730             goto fail;
1731
1732           TMR_INDEX (base) = civ->base;
1733           astep = civ->step;
1734
1735           if (astep)
1736             {
1737               if (TMR_STEP (base))
1738                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1739
1740               step = fold_build2 (PLUS_EXPR, type, step, astep);
1741             }
1742         }
1743
1744       if (integer_zerop (step))
1745         goto fail;
1746       base = tree_mem_ref_addr (type, base);
1747     }
1748   else
1749     {
1750       ifs_ivopts_data.ivopts_data = data;
1751       ifs_ivopts_data.stmt = stmt;
1752       ifs_ivopts_data.step = size_zero_node;
1753       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1754           || integer_zerop (ifs_ivopts_data.step))
1755         goto fail;
1756       step = ifs_ivopts_data.step;
1757
1758       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1759
1760       /* Check that the base expression is addressable.  This needs
1761          to be done after substituting bases of IVs into it.  */
1762       if (may_be_nonaddressable_p (base))
1763         goto fail;
1764
1765       /* Moreover, on strict alignment platforms, check that it is
1766          sufficiently aligned.  */
1767       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1768         goto fail;
1769
1770       base = build_fold_addr_expr (base);
1771
1772       /* Substituting bases of IVs into the base expression might
1773          have caused folding opportunities.  */
1774       if (TREE_CODE (base) == ADDR_EXPR)
1775         {
1776           tree *ref = &TREE_OPERAND (base, 0);
1777           while (handled_component_p (*ref))
1778             ref = &TREE_OPERAND (*ref, 0);
1779           if (TREE_CODE (*ref) == MEM_REF)
1780             {
1781               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1782                                       TREE_OPERAND (*ref, 0),
1783                                       TREE_OPERAND (*ref, 1));
1784               if (tem)
1785                 *ref = tem;
1786             }
1787         }
1788     }
1789
1790   civ = alloc_iv (base, step);
1791   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1792   return;
1793
1794 fail:
1795   for_each_index (op_p, idx_record_use, data);
1796 }
1797
1798 /* Finds and records invariants used in STMT.  */
1799
1800 static void
1801 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1802 {
1803   ssa_op_iter iter;
1804   use_operand_p use_p;
1805   tree op;
1806
1807   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1808     {
1809       op = USE_FROM_PTR (use_p);
1810       record_invariant (data, op, false);
1811     }
1812 }
1813
1814 /* Finds interesting uses of induction variables in the statement STMT.  */
1815
1816 static void
1817 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1818 {
1819   struct iv *iv;
1820   tree op, *lhs, *rhs;
1821   ssa_op_iter iter;
1822   use_operand_p use_p;
1823   enum tree_code code;
1824
1825   find_invariants_stmt (data, stmt);
1826
1827   if (gimple_code (stmt) == GIMPLE_COND)
1828     {
1829       find_interesting_uses_cond (data, stmt);
1830       return;
1831     }
1832
1833   if (is_gimple_assign (stmt))
1834     {
1835       lhs = gimple_assign_lhs_ptr (stmt);
1836       rhs = gimple_assign_rhs1_ptr (stmt);
1837
1838       if (TREE_CODE (*lhs) == SSA_NAME)
1839         {
1840           /* If the statement defines an induction variable, the uses are not
1841              interesting by themselves.  */
1842
1843           iv = get_iv (data, *lhs);
1844
1845           if (iv && !integer_zerop (iv->step))
1846             return;
1847         }
1848
1849       code = gimple_assign_rhs_code (stmt);
1850       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1851           && (REFERENCE_CLASS_P (*rhs)
1852               || is_gimple_val (*rhs)))
1853         {
1854           if (REFERENCE_CLASS_P (*rhs))
1855             find_interesting_uses_address (data, stmt, rhs);
1856           else
1857             find_interesting_uses_op (data, *rhs);
1858
1859           if (REFERENCE_CLASS_P (*lhs))
1860             find_interesting_uses_address (data, stmt, lhs);
1861           return;
1862         }
1863       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1864         {
1865           find_interesting_uses_cond (data, stmt);
1866           return;
1867         }
1868
1869       /* TODO -- we should also handle address uses of type
1870
1871          memory = call (whatever);
1872
1873          and
1874
1875          call (memory).  */
1876     }
1877
1878   if (gimple_code (stmt) == GIMPLE_PHI
1879       && gimple_bb (stmt) == data->current_loop->header)
1880     {
1881       iv = get_iv (data, PHI_RESULT (stmt));
1882
1883       if (iv && !integer_zerop (iv->step))
1884         return;
1885     }
1886
1887   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1888     {
1889       op = USE_FROM_PTR (use_p);
1890
1891       if (TREE_CODE (op) != SSA_NAME)
1892         continue;
1893
1894       iv = get_iv (data, op);
1895       if (!iv)
1896         continue;
1897
1898       find_interesting_uses_op (data, op);
1899     }
1900 }
1901
1902 /* Finds interesting uses of induction variables outside of loops
1903    on loop exit edge EXIT.  */
1904
1905 static void
1906 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1907 {
1908   gimple phi;
1909   gimple_stmt_iterator psi;
1910   tree def;
1911
1912   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1913     {
1914       phi = gsi_stmt (psi);
1915       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1916       if (is_gimple_reg (def))
1917         find_interesting_uses_op (data, def);
1918     }
1919 }
1920
1921 /* Finds uses of the induction variables that are interesting.  */
1922
1923 static void
1924 find_interesting_uses (struct ivopts_data *data)
1925 {
1926   basic_block bb;
1927   gimple_stmt_iterator bsi;
1928   basic_block *body = get_loop_body (data->current_loop);
1929   unsigned i;
1930   struct version_info *info;
1931   edge e;
1932
1933   if (dump_file && (dump_flags & TDF_DETAILS))
1934     fprintf (dump_file, "Uses:\n\n");
1935
1936   for (i = 0; i < data->current_loop->num_nodes; i++)
1937     {
1938       edge_iterator ei;
1939       bb = body[i];
1940
1941       FOR_EACH_EDGE (e, ei, bb->succs)
1942         if (e->dest != EXIT_BLOCK_PTR
1943             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1944           find_interesting_uses_outside (data, e);
1945
1946       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1947         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1948       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1949         if (!is_gimple_debug (gsi_stmt (bsi)))
1950           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1951     }
1952
1953   if (dump_file && (dump_flags & TDF_DETAILS))
1954     {
1955       bitmap_iterator bi;
1956
1957       fprintf (dump_file, "\n");
1958
1959       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1960         {
1961           info = ver_info (data, i);
1962           if (info->inv_id)
1963             {
1964               fprintf (dump_file, "  ");
1965               print_generic_expr (dump_file, info->name, TDF_SLIM);
1966               fprintf (dump_file, " is invariant (%d)%s\n",
1967                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1968             }
1969         }
1970
1971       fprintf (dump_file, "\n");
1972     }
1973
1974   free (body);
1975 }
1976
1977 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1978    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1979    we are at the top-level of the processed address.  */
1980
1981 static tree
1982 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1983                 unsigned HOST_WIDE_INT *offset)
1984 {
1985   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1986   enum tree_code code;
1987   tree type, orig_type = TREE_TYPE (expr);
1988   unsigned HOST_WIDE_INT off0, off1, st;
1989   tree orig_expr = expr;
1990
1991   STRIP_NOPS (expr);
1992
1993   type = TREE_TYPE (expr);
1994   code = TREE_CODE (expr);
1995   *offset = 0;
1996
1997   switch (code)
1998     {
1999     case INTEGER_CST:
2000       if (!cst_and_fits_in_hwi (expr)
2001           || integer_zerop (expr))
2002         return orig_expr;
2003
2004       *offset = int_cst_value (expr);
2005       return build_int_cst (orig_type, 0);
2006
2007     case POINTER_PLUS_EXPR:
2008     case PLUS_EXPR:
2009     case MINUS_EXPR:
2010       op0 = TREE_OPERAND (expr, 0);
2011       op1 = TREE_OPERAND (expr, 1);
2012
2013       op0 = strip_offset_1 (op0, false, false, &off0);
2014       op1 = strip_offset_1 (op1, false, false, &off1);
2015
2016       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2017       if (op0 == TREE_OPERAND (expr, 0)
2018           && op1 == TREE_OPERAND (expr, 1))
2019         return orig_expr;
2020
2021       if (integer_zerop (op1))
2022         expr = op0;
2023       else if (integer_zerop (op0))
2024         {
2025           if (code == MINUS_EXPR)
2026             expr = fold_build1 (NEGATE_EXPR, type, op1);
2027           else
2028             expr = op1;
2029         }
2030       else
2031         expr = fold_build2 (code, type, op0, op1);
2032
2033       return fold_convert (orig_type, expr);
2034
2035     case MULT_EXPR:
2036       op1 = TREE_OPERAND (expr, 1);
2037       if (!cst_and_fits_in_hwi (op1))
2038         return orig_expr;
2039
2040       op0 = TREE_OPERAND (expr, 0);
2041       op0 = strip_offset_1 (op0, false, false, &off0);
2042       if (op0 == TREE_OPERAND (expr, 0))
2043         return orig_expr;
2044
2045       *offset = off0 * int_cst_value (op1);
2046       if (integer_zerop (op0))
2047         expr = op0;
2048       else
2049         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2050
2051       return fold_convert (orig_type, expr);
2052
2053     case ARRAY_REF:
2054     case ARRAY_RANGE_REF:
2055       if (!inside_addr)
2056         return orig_expr;
2057
2058       step = array_ref_element_size (expr);
2059       if (!cst_and_fits_in_hwi (step))
2060         break;
2061
2062       st = int_cst_value (step);
2063       op1 = TREE_OPERAND (expr, 1);
2064       op1 = strip_offset_1 (op1, false, false, &off1);
2065       *offset = off1 * st;
2066
2067       if (top_compref
2068           && integer_zerop (op1))
2069         {
2070           /* Strip the component reference completely.  */
2071           op0 = TREE_OPERAND (expr, 0);
2072           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2073           *offset += off0;
2074           return op0;
2075         }
2076       break;
2077
2078     case COMPONENT_REF:
2079       if (!inside_addr)
2080         return orig_expr;
2081
2082       tmp = component_ref_field_offset (expr);
2083       if (top_compref
2084           && cst_and_fits_in_hwi (tmp))
2085         {
2086           /* Strip the component reference completely.  */
2087           op0 = TREE_OPERAND (expr, 0);
2088           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2089           *offset = off0 + int_cst_value (tmp);
2090           return op0;
2091         }
2092       break;
2093
2094     case ADDR_EXPR:
2095       op0 = TREE_OPERAND (expr, 0);
2096       op0 = strip_offset_1 (op0, true, true, &off0);
2097       *offset += off0;
2098
2099       if (op0 == TREE_OPERAND (expr, 0))
2100         return orig_expr;
2101
2102       expr = build_fold_addr_expr (op0);
2103       return fold_convert (orig_type, expr);
2104
2105     case MEM_REF:
2106       /* ???  Offset operand?  */
2107       inside_addr = false;
2108       break;
2109
2110     default:
2111       return orig_expr;
2112     }
2113
2114   /* Default handling of expressions for that we want to recurse into
2115      the first operand.  */
2116   op0 = TREE_OPERAND (expr, 0);
2117   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2118   *offset += off0;
2119
2120   if (op0 == TREE_OPERAND (expr, 0)
2121       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2122     return orig_expr;
2123
2124   expr = copy_node (expr);
2125   TREE_OPERAND (expr, 0) = op0;
2126   if (op1)
2127     TREE_OPERAND (expr, 1) = op1;
2128
2129   /* Inside address, we might strip the top level component references,
2130      thus changing type of the expression.  Handling of ADDR_EXPR
2131      will fix that.  */
2132   expr = fold_convert (orig_type, expr);
2133
2134   return expr;
2135 }
2136
2137 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2138
2139 static tree
2140 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2141 {
2142   return strip_offset_1 (expr, false, false, offset);
2143 }
2144
2145 /* Returns variant of TYPE that can be used as base for different uses.
2146    We return unsigned type with the same precision, which avoids problems
2147    with overflows.  */
2148
2149 static tree
2150 generic_type_for (tree type)
2151 {
2152   if (POINTER_TYPE_P (type))
2153     return unsigned_type_for (type);
2154
2155   if (TYPE_UNSIGNED (type))
2156     return type;
2157
2158   return unsigned_type_for (type);
2159 }
2160
2161 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2162    the bitmap to that we should store it.  */
2163
2164 static struct ivopts_data *fd_ivopts_data;
2165 static tree
2166 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2167 {
2168   bitmap *depends_on = (bitmap *) data;
2169   struct version_info *info;
2170
2171   if (TREE_CODE (*expr_p) != SSA_NAME)
2172     return NULL_TREE;
2173   info = name_info (fd_ivopts_data, *expr_p);
2174
2175   if (!info->inv_id || info->has_nonlin_use)
2176     return NULL_TREE;
2177
2178   if (!*depends_on)
2179     *depends_on = BITMAP_ALLOC (NULL);
2180   bitmap_set_bit (*depends_on, info->inv_id);
2181
2182   return NULL_TREE;
2183 }
2184
2185 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2186    position to POS.  If USE is not NULL, the candidate is set as related to
2187    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2188    replacement of the final value of the iv by a direct computation.  */
2189
2190 static struct iv_cand *
2191 add_candidate_1 (struct ivopts_data *data,
2192                  tree base, tree step, bool important, enum iv_position pos,
2193                  struct iv_use *use, gimple incremented_at)
2194 {
2195   unsigned i;
2196   struct iv_cand *cand = NULL;
2197   tree type, orig_type;
2198
2199   if (base)
2200     {
2201       orig_type = TREE_TYPE (base);
2202       type = generic_type_for (orig_type);
2203       if (type != orig_type)
2204         {
2205           base = fold_convert (type, base);
2206           step = fold_convert (type, step);
2207         }
2208     }
2209
2210   for (i = 0; i < n_iv_cands (data); i++)
2211     {
2212       cand = iv_cand (data, i);
2213
2214       if (cand->pos != pos)
2215         continue;
2216
2217       if (cand->incremented_at != incremented_at
2218           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2219               && cand->ainc_use != use))
2220         continue;
2221
2222       if (!cand->iv)
2223         {
2224           if (!base && !step)
2225             break;
2226
2227           continue;
2228         }
2229
2230       if (!base && !step)
2231         continue;
2232
2233       if (operand_equal_p (base, cand->iv->base, 0)
2234           && operand_equal_p (step, cand->iv->step, 0)
2235           && (TYPE_PRECISION (TREE_TYPE (base))
2236               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2237         break;
2238     }
2239
2240   if (i == n_iv_cands (data))
2241     {
2242       cand = XCNEW (struct iv_cand);
2243       cand->id = i;
2244
2245       if (!base && !step)
2246         cand->iv = NULL;
2247       else
2248         cand->iv = alloc_iv (base, step);
2249
2250       cand->pos = pos;
2251       if (pos != IP_ORIGINAL && cand->iv)
2252         {
2253           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2254           cand->var_after = cand->var_before;
2255         }
2256       cand->important = important;
2257       cand->incremented_at = incremented_at;
2258       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2259
2260       if (step
2261           && TREE_CODE (step) != INTEGER_CST)
2262         {
2263           fd_ivopts_data = data;
2264           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2265         }
2266
2267       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2268         cand->ainc_use = use;
2269       else
2270         cand->ainc_use = NULL;
2271
2272       if (dump_file && (dump_flags & TDF_DETAILS))
2273         dump_cand (dump_file, cand);
2274     }
2275
2276   if (important && !cand->important)
2277     {
2278       cand->important = true;
2279       if (dump_file && (dump_flags & TDF_DETAILS))
2280         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2281     }
2282
2283   if (use)
2284     {
2285       bitmap_set_bit (use->related_cands, i);
2286       if (dump_file && (dump_flags & TDF_DETAILS))
2287         fprintf (dump_file, "Candidate %d is related to use %d\n",
2288                  cand->id, use->id);
2289     }
2290
2291   return cand;
2292 }
2293
2294 /* Returns true if incrementing the induction variable at the end of the LOOP
2295    is allowed.
2296
2297    The purpose is to avoid splitting latch edge with a biv increment, thus
2298    creating a jump, possibly confusing other optimization passes and leaving
2299    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2300    is not available (so we do not have a better alternative), or if the latch
2301    edge is already nonempty.  */
2302
2303 static bool
2304 allow_ip_end_pos_p (struct loop *loop)
2305 {
2306   if (!ip_normal_pos (loop))
2307     return true;
2308
2309   if (!empty_block_p (ip_end_pos (loop)))
2310     return true;
2311
2312   return false;
2313 }
2314
2315 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2316    Important field is set to IMPORTANT.  */
2317
2318 static void
2319 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2320                         bool important, struct iv_use *use)
2321 {
2322   basic_block use_bb = gimple_bb (use->stmt);
2323   enum machine_mode mem_mode;
2324   unsigned HOST_WIDE_INT cstepi;
2325
2326   /* If we insert the increment in any position other than the standard
2327      ones, we must ensure that it is incremented once per iteration.
2328      It must not be in an inner nested loop, or one side of an if
2329      statement.  */
2330   if (use_bb->loop_father != data->current_loop
2331       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2332       || stmt_could_throw_p (use->stmt)
2333       || !cst_and_fits_in_hwi (step))
2334     return;
2335
2336   cstepi = int_cst_value (step);
2337
2338   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2339   if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2340       || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2341     {
2342       enum tree_code code = MINUS_EXPR;
2343       tree new_base;
2344       tree new_step = step;
2345
2346       if (POINTER_TYPE_P (TREE_TYPE (base)))
2347         {
2348           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2349           code = POINTER_PLUS_EXPR;
2350         }
2351       else
2352         new_step = fold_convert (TREE_TYPE (base), new_step);
2353       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2354       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2355                        use->stmt);
2356     }
2357   if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2358       || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2359     {
2360       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2361                        use->stmt);
2362     }
2363 }
2364
2365 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2366    position to POS.  If USE is not NULL, the candidate is set as related to
2367    it.  The candidate computation is scheduled on all available positions.  */
2368
2369 static void
2370 add_candidate (struct ivopts_data *data,
2371                tree base, tree step, bool important, struct iv_use *use)
2372 {
2373   if (ip_normal_pos (data->current_loop))
2374     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2375   if (ip_end_pos (data->current_loop)
2376       && allow_ip_end_pos_p (data->current_loop))
2377     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2378
2379   if (use != NULL && use->type == USE_ADDRESS)
2380     add_autoinc_candidates (data, base, step, important, use);
2381 }
2382
2383 /* Add a standard "0 + 1 * iteration" iv candidate for a
2384    type with SIZE bits.  */
2385
2386 static void
2387 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2388                                      unsigned int size)
2389 {
2390   tree type = lang_hooks.types.type_for_size (size, true);
2391   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2392                  true, NULL);
2393 }
2394
2395 /* Adds standard iv candidates.  */
2396
2397 static void
2398 add_standard_iv_candidates (struct ivopts_data *data)
2399 {
2400   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2401
2402   /* The same for a double-integer type if it is still fast enough.  */
2403   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2404     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2405 }
2406
2407
2408 /* Adds candidates bases on the old induction variable IV.  */
2409
2410 static void
2411 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2412 {
2413   gimple phi;
2414   tree def;
2415   struct iv_cand *cand;
2416
2417   add_candidate (data, iv->base, iv->step, true, NULL);
2418
2419   /* The same, but with initial value zero.  */
2420   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2421     add_candidate (data, size_int (0), iv->step, true, NULL);
2422   else
2423     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2424                    iv->step, true, NULL);
2425
2426   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2427   if (gimple_code (phi) == GIMPLE_PHI)
2428     {
2429       /* Additionally record the possibility of leaving the original iv
2430          untouched.  */
2431       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2432       cand = add_candidate_1 (data,
2433                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2434                               SSA_NAME_DEF_STMT (def));
2435       cand->var_before = iv->ssa_name;
2436       cand->var_after = def;
2437     }
2438 }
2439
2440 /* Adds candidates based on the old induction variables.  */
2441
2442 static void
2443 add_old_ivs_candidates (struct ivopts_data *data)
2444 {
2445   unsigned i;
2446   struct iv *iv;
2447   bitmap_iterator bi;
2448
2449   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2450     {
2451       iv = ver_info (data, i)->iv;
2452       if (iv && iv->biv_p && !integer_zerop (iv->step))
2453         add_old_iv_candidates (data, iv);
2454     }
2455 }
2456
2457 /* Adds candidates based on the value of the induction variable IV and USE.  */
2458
2459 static void
2460 add_iv_value_candidates (struct ivopts_data *data,
2461                          struct iv *iv, struct iv_use *use)
2462 {
2463   unsigned HOST_WIDE_INT offset;
2464   tree base;
2465   tree basetype;
2466
2467   add_candidate (data, iv->base, iv->step, false, use);
2468
2469   /* The same, but with initial value zero.  Make such variable important,
2470      since it is generic enough so that possibly many uses may be based
2471      on it.  */
2472   basetype = TREE_TYPE (iv->base);
2473   if (POINTER_TYPE_P (basetype))
2474     basetype = sizetype;
2475   add_candidate (data, build_int_cst (basetype, 0),
2476                  iv->step, true, use);
2477
2478   /* Third, try removing the constant offset.  Make sure to even
2479      add a candidate for &a[0] vs. (T *)&a.  */
2480   base = strip_offset (iv->base, &offset);
2481   if (offset
2482       || base != iv->base)
2483     add_candidate (data, base, iv->step, false, use);
2484 }
2485
2486 /* Adds candidates based on the uses.  */
2487
2488 static void
2489 add_derived_ivs_candidates (struct ivopts_data *data)
2490 {
2491   unsigned i;
2492
2493   for (i = 0; i < n_iv_uses (data); i++)
2494     {
2495       struct iv_use *use = iv_use (data, i);
2496
2497       if (!use)
2498         continue;
2499
2500       switch (use->type)
2501         {
2502         case USE_NONLINEAR_EXPR:
2503         case USE_COMPARE:
2504         case USE_ADDRESS:
2505           /* Just add the ivs based on the value of the iv used here.  */
2506           add_iv_value_candidates (data, use->iv, use);
2507           break;
2508
2509         default:
2510           gcc_unreachable ();
2511         }
2512     }
2513 }
2514
2515 /* Record important candidates and add them to related_cands bitmaps
2516    if needed.  */
2517
2518 static void
2519 record_important_candidates (struct ivopts_data *data)
2520 {
2521   unsigned i;
2522   struct iv_use *use;
2523
2524   for (i = 0; i < n_iv_cands (data); i++)
2525     {
2526       struct iv_cand *cand = iv_cand (data, i);
2527
2528       if (cand->important)
2529         bitmap_set_bit (data->important_candidates, i);
2530     }
2531
2532   data->consider_all_candidates = (n_iv_cands (data)
2533                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2534
2535   if (data->consider_all_candidates)
2536     {
2537       /* We will not need "related_cands" bitmaps in this case,
2538          so release them to decrease peak memory consumption.  */
2539       for (i = 0; i < n_iv_uses (data); i++)
2540         {
2541           use = iv_use (data, i);
2542           BITMAP_FREE (use->related_cands);
2543         }
2544     }
2545   else
2546     {
2547       /* Add important candidates to the related_cands bitmaps.  */
2548       for (i = 0; i < n_iv_uses (data); i++)
2549         bitmap_ior_into (iv_use (data, i)->related_cands,
2550                          data->important_candidates);
2551     }
2552 }
2553
2554 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2555    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2556    we allocate a simple list to every use.  */
2557
2558 static void
2559 alloc_use_cost_map (struct ivopts_data *data)
2560 {
2561   unsigned i, size, s, j;
2562
2563   for (i = 0; i < n_iv_uses (data); i++)
2564     {
2565       struct iv_use *use = iv_use (data, i);
2566       bitmap_iterator bi;
2567
2568       if (data->consider_all_candidates)
2569         size = n_iv_cands (data);
2570       else
2571         {
2572           s = 0;
2573           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2574             {
2575               s++;
2576             }
2577
2578           /* Round up to the power of two, so that moduling by it is fast.  */
2579           for (size = 1; size < s; size <<= 1)
2580             continue;
2581         }
2582
2583       use->n_map_members = size;
2584       use->cost_map = XCNEWVEC (struct cost_pair, size);
2585     }
2586 }
2587
2588 /* Returns description of computation cost of expression whose runtime
2589    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2590
2591 static comp_cost
2592 new_cost (unsigned runtime, unsigned complexity)
2593 {
2594   comp_cost cost;
2595
2596   cost.cost = runtime;
2597   cost.complexity = complexity;
2598
2599   return cost;
2600 }
2601
2602 /* Adds costs COST1 and COST2.  */
2603
2604 static comp_cost
2605 add_costs (comp_cost cost1, comp_cost cost2)
2606 {
2607   cost1.cost += cost2.cost;
2608   cost1.complexity += cost2.complexity;
2609
2610   return cost1;
2611 }
2612 /* Subtracts costs COST1 and COST2.  */
2613
2614 static comp_cost
2615 sub_costs (comp_cost cost1, comp_cost cost2)
2616 {
2617   cost1.cost -= cost2.cost;
2618   cost1.complexity -= cost2.complexity;
2619
2620   return cost1;
2621 }
2622
2623 /* Returns a negative number if COST1 < COST2, a positive number if
2624    COST1 > COST2, and 0 if COST1 = COST2.  */
2625
2626 static int
2627 compare_costs (comp_cost cost1, comp_cost cost2)
2628 {
2629   if (cost1.cost == cost2.cost)
2630     return cost1.complexity - cost2.complexity;
2631
2632   return cost1.cost - cost2.cost;
2633 }
2634
2635 /* Returns true if COST is infinite.  */
2636
2637 static bool
2638 infinite_cost_p (comp_cost cost)
2639 {
2640   return cost.cost == INFTY;
2641 }
2642
2643 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2644    on invariants DEPENDS_ON and that the value used in expressing it
2645    is VALUE.  */
2646
2647 static void
2648 set_use_iv_cost (struct ivopts_data *data,
2649                  struct iv_use *use, struct iv_cand *cand,
2650                  comp_cost cost, bitmap depends_on, tree value,
2651                  int inv_expr_id)
2652 {
2653   unsigned i, s;
2654
2655   if (infinite_cost_p (cost))
2656     {
2657       BITMAP_FREE (depends_on);
2658       return;
2659     }
2660
2661   if (data->consider_all_candidates)
2662     {
2663       use->cost_map[cand->id].cand = cand;
2664       use->cost_map[cand->id].cost = cost;
2665       use->cost_map[cand->id].depends_on = depends_on;
2666       use->cost_map[cand->id].value = value;
2667       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2668       return;
2669     }
2670
2671   /* n_map_members is a power of two, so this computes modulo.  */
2672   s = cand->id & (use->n_map_members - 1);
2673   for (i = s; i < use->n_map_members; i++)
2674     if (!use->cost_map[i].cand)
2675       goto found;
2676   for (i = 0; i < s; i++)
2677     if (!use->cost_map[i].cand)
2678       goto found;
2679
2680   gcc_unreachable ();
2681
2682 found:
2683   use->cost_map[i].cand = cand;
2684   use->cost_map[i].cost = cost;
2685   use->cost_map[i].depends_on = depends_on;
2686   use->cost_map[i].value = value;
2687   use->cost_map[i].inv_expr_id = inv_expr_id;
2688 }
2689
2690 /* Gets cost of (USE, CANDIDATE) pair.  */
2691
2692 static struct cost_pair *
2693 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2694                  struct iv_cand *cand)
2695 {
2696   unsigned i, s;
2697   struct cost_pair *ret;
2698
2699   if (!cand)
2700     return NULL;
2701
2702   if (data->consider_all_candidates)
2703     {
2704       ret = use->cost_map + cand->id;
2705       if (!ret->cand)
2706         return NULL;
2707
2708       return ret;
2709     }
2710
2711   /* n_map_members is a power of two, so this computes modulo.  */
2712   s = cand->id & (use->n_map_members - 1);
2713   for (i = s; i < use->n_map_members; i++)
2714     if (use->cost_map[i].cand == cand)
2715       return use->cost_map + i;
2716
2717   for (i = 0; i < s; i++)
2718     if (use->cost_map[i].cand == cand)
2719       return use->cost_map + i;
2720
2721   return NULL;
2722 }
2723
2724 /* Returns estimate on cost of computing SEQ.  */
2725
2726 static unsigned
2727 seq_cost (rtx seq, bool speed)
2728 {
2729   unsigned cost = 0;
2730   rtx set;
2731
2732   for (; seq; seq = NEXT_INSN (seq))
2733     {
2734       set = single_set (seq);
2735       if (set)
2736         cost += rtx_cost (set, SET,speed);
2737       else
2738         cost++;
2739     }
2740
2741   return cost;
2742 }
2743
2744 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2745 static rtx
2746 produce_memory_decl_rtl (tree obj, int *regno)
2747 {
2748   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2749   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2750   rtx x;
2751
2752   gcc_assert (obj);
2753   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2754     {
2755       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2756       x = gen_rtx_SYMBOL_REF (address_mode, name);
2757       SET_SYMBOL_REF_DECL (x, obj);
2758       x = gen_rtx_MEM (DECL_MODE (obj), x);
2759       set_mem_addr_space (x, as);
2760       targetm.encode_section_info (obj, x, true);
2761     }
2762   else
2763     {
2764       x = gen_raw_REG (address_mode, (*regno)++);
2765       x = gen_rtx_MEM (DECL_MODE (obj), x);
2766       set_mem_addr_space (x, as);
2767     }
2768
2769   return x;
2770 }
2771
2772 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2773    walk_tree.  DATA contains the actual fake register number.  */
2774
2775 static tree
2776 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2777 {
2778   tree obj = NULL_TREE;
2779   rtx x = NULL_RTX;
2780   int *regno = (int *) data;
2781
2782   switch (TREE_CODE (*expr_p))
2783     {
2784     case ADDR_EXPR:
2785       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2786            handled_component_p (*expr_p);
2787            expr_p = &TREE_OPERAND (*expr_p, 0))
2788         continue;
2789       obj = *expr_p;
2790       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2791         x = produce_memory_decl_rtl (obj, regno);
2792       break;
2793
2794     case SSA_NAME:
2795       *ws = 0;
2796       obj = SSA_NAME_VAR (*expr_p);
2797       if (!DECL_RTL_SET_P (obj))
2798         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2799       break;
2800
2801     case VAR_DECL:
2802     case PARM_DECL:
2803     case RESULT_DECL:
2804       *ws = 0;
2805       obj = *expr_p;
2806
2807       if (DECL_RTL_SET_P (obj))
2808         break;
2809
2810       if (DECL_MODE (obj) == BLKmode)
2811         x = produce_memory_decl_rtl (obj, regno);
2812       else
2813         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2814
2815       break;
2816
2817     default:
2818       break;
2819     }
2820
2821   if (x)
2822     {
2823       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2824       SET_DECL_RTL (obj, x);
2825     }
2826
2827   return NULL_TREE;
2828 }
2829
2830 /* Determines cost of the computation of EXPR.  */
2831
2832 static unsigned
2833 computation_cost (tree expr, bool speed)
2834 {
2835   rtx seq, rslt;
2836   tree type = TREE_TYPE (expr);
2837   unsigned cost;
2838   /* Avoid using hard regs in ways which may be unsupported.  */
2839   int regno = LAST_VIRTUAL_REGISTER + 1;
2840   struct cgraph_node *node = cgraph_node (current_function_decl);
2841   enum node_frequency real_frequency = node->frequency;
2842
2843   node->frequency = NODE_FREQUENCY_NORMAL;
2844   crtl->maybe_hot_insn_p = speed;
2845   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2846   start_sequence ();
2847   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2848   seq = get_insns ();
2849   end_sequence ();
2850   default_rtl_profile ();
2851   node->frequency = real_frequency;
2852
2853   cost = seq_cost (seq, speed);
2854   if (MEM_P (rslt))
2855     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2856                           TYPE_ADDR_SPACE (type), speed);
2857
2858   return cost;
2859 }
2860
2861 /* Returns variable containing the value of candidate CAND at statement AT.  */
2862
2863 static tree
2864 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2865 {
2866   if (stmt_after_increment (loop, cand, stmt))
2867     return cand->var_after;
2868   else
2869     return cand->var_before;
2870 }
2871
2872 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2873    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2874
2875 int
2876 tree_int_cst_sign_bit (const_tree t)
2877 {
2878   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2879   unsigned HOST_WIDE_INT w;
2880
2881   if (bitno < HOST_BITS_PER_WIDE_INT)
2882     w = TREE_INT_CST_LOW (t);
2883   else
2884     {
2885       w = TREE_INT_CST_HIGH (t);
2886       bitno -= HOST_BITS_PER_WIDE_INT;
2887     }
2888
2889   return (w >> bitno) & 1;
2890 }
2891
2892 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2893    same precision that is at least as wide as the precision of TYPE, stores
2894    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2895    type of A and B.  */
2896
2897 static tree
2898 determine_common_wider_type (tree *a, tree *b)
2899 {
2900   tree wider_type = NULL;
2901   tree suba, subb;
2902   tree atype = TREE_TYPE (*a);
2903
2904   if (CONVERT_EXPR_P (*a))
2905     {
2906       suba = TREE_OPERAND (*a, 0);
2907       wider_type = TREE_TYPE (suba);
2908       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2909         return atype;
2910     }
2911   else
2912     return atype;
2913
2914   if (CONVERT_EXPR_P (*b))
2915     {
2916       subb = TREE_OPERAND (*b, 0);
2917       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2918         return atype;
2919     }
2920   else
2921     return atype;
2922
2923   *a = suba;
2924   *b = subb;
2925   return wider_type;
2926 }
2927
2928 /* Determines the expression by that USE is expressed from induction variable
2929    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2930    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2931
2932 static bool
2933 get_computation_aff (struct loop *loop,
2934                      struct iv_use *use, struct iv_cand *cand, gimple at,
2935                      struct affine_tree_combination *aff)
2936 {
2937   tree ubase = use->iv->base;
2938   tree ustep = use->iv->step;
2939   tree cbase = cand->iv->base;
2940   tree cstep = cand->iv->step, cstep_common;
2941   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2942   tree common_type, var;
2943   tree uutype;
2944   aff_tree cbase_aff, var_aff;
2945   double_int rat;
2946
2947   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2948     {
2949       /* We do not have a precision to express the values of use.  */
2950       return false;
2951     }
2952
2953   var = var_at_stmt (loop, cand, at);
2954   uutype = unsigned_type_for (utype);
2955
2956   /* If the conversion is not noop, perform it.  */
2957   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2958     {
2959       cstep = fold_convert (uutype, cstep);
2960       cbase = fold_convert (uutype, cbase);
2961       var = fold_convert (uutype, var);
2962     }
2963
2964   if (!constant_multiple_of (ustep, cstep, &rat))
2965     return false;
2966
2967   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2968      type, we achieve better folding by computing their difference in this
2969      wider type, and cast the result to UUTYPE.  We do not need to worry about
2970      overflows, as all the arithmetics will in the end be performed in UUTYPE
2971      anyway.  */
2972   common_type = determine_common_wider_type (&ubase, &cbase);
2973
2974   /* use = ubase - ratio * cbase + ratio * var.  */
2975   tree_to_aff_combination (ubase, common_type, aff);
2976   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2977   tree_to_aff_combination (var, uutype, &var_aff);
2978
2979   /* We need to shift the value if we are after the increment.  */
2980   if (stmt_after_increment (loop, cand, at))
2981     {
2982       aff_tree cstep_aff;
2983
2984       if (common_type != uutype)
2985         cstep_common = fold_convert (common_type, cstep);
2986       else
2987         cstep_common = cstep;
2988
2989       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2990       aff_combination_add (&cbase_aff, &cstep_aff);
2991     }
2992
2993   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2994   aff_combination_add (aff, &cbase_aff);
2995   if (common_type != uutype)
2996     aff_combination_convert (aff, uutype);
2997
2998   aff_combination_scale (&var_aff, rat);
2999   aff_combination_add (aff, &var_aff);
3000
3001   return true;
3002 }
3003
3004 /* Determines the expression by that USE is expressed from induction variable
3005    CAND at statement AT in LOOP.  The computation is unshared.  */
3006
3007 static tree
3008 get_computation_at (struct loop *loop,
3009                     struct iv_use *use, struct iv_cand *cand, gimple at)
3010 {
3011   aff_tree aff;
3012   tree type = TREE_TYPE (use->iv->base);
3013
3014   if (!get_computation_aff (loop, use, cand, at, &aff))
3015     return NULL_TREE;
3016   unshare_aff_combination (&aff);
3017   return fold_convert (type, aff_combination_to_tree (&aff));
3018 }
3019
3020 /* Determines the expression by that USE is expressed from induction variable
3021    CAND in LOOP.  The computation is unshared.  */
3022
3023 static tree
3024 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3025 {
3026   return get_computation_at (loop, use, cand, use->stmt);
3027 }
3028
3029 /* Adjust the cost COST for being in loop setup rather than loop body.
3030    If we're optimizing for space, the loop setup overhead is constant;
3031    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3032 static unsigned
3033 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3034 {
3035   if (cost == INFTY)
3036     return cost;
3037   else if (optimize_loop_for_speed_p (data->current_loop))
3038     return cost / avg_loop_niter (data->current_loop);
3039   else
3040     return cost;
3041 }
3042
3043 /* Returns cost of addition in MODE.  */
3044
3045 static unsigned
3046 add_cost (enum machine_mode mode, bool speed)
3047 {
3048   static unsigned costs[NUM_MACHINE_MODES];
3049   rtx seq;
3050   unsigned cost;
3051
3052   if (costs[mode])
3053     return costs[mode];
3054
3055   start_sequence ();
3056   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3057                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3058                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3059                  NULL_RTX);
3060   seq = get_insns ();
3061   end_sequence ();
3062
3063   cost = seq_cost (seq, speed);
3064   if (!cost)
3065     cost = 1;
3066
3067   costs[mode] = cost;
3068
3069   if (dump_file && (dump_flags & TDF_DETAILS))
3070     fprintf (dump_file, "Addition in %s costs %d\n",
3071              GET_MODE_NAME (mode), cost);
3072   return cost;
3073 }
3074
3075 /* Entry in a hashtable of already known costs for multiplication.  */
3076 struct mbc_entry
3077 {
3078   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3079   enum machine_mode mode;       /* In mode.  */
3080   unsigned cost;                /* The cost.  */
3081 };
3082
3083 /* Counts hash value for the ENTRY.  */
3084
3085 static hashval_t
3086 mbc_entry_hash (const void *entry)
3087 {
3088   const struct mbc_entry *e = (const struct mbc_entry *) entry;
3089
3090   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3091 }
3092
3093 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3094
3095 static int
3096 mbc_entry_eq (const void *entry1, const void *entry2)
3097 {
3098   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3099   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3100
3101   return (e1->mode == e2->mode
3102           && e1->cst == e2->cst);
3103 }
3104
3105 /* Returns cost of multiplication by constant CST in MODE.  */
3106
3107 unsigned
3108 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3109 {
3110   static htab_t costs;
3111   struct mbc_entry **cached, act;
3112   rtx seq;
3113   unsigned cost;
3114
3115   if (!costs)
3116     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3117
3118   act.mode = mode;
3119   act.cst = cst;
3120   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3121   if (*cached)
3122     return (*cached)->cost;
3123
3124   *cached = XNEW (struct mbc_entry);
3125   (*cached)->mode = mode;
3126   (*cached)->cst = cst;
3127
3128   start_sequence ();
3129   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3130                gen_int_mode (cst, mode), NULL_RTX, 0);
3131   seq = get_insns ();
3132   end_sequence ();
3133
3134   cost = seq_cost (seq, speed);
3135
3136   if (dump_file && (dump_flags & TDF_DETAILS))
3137     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3138              (int) cst, GET_MODE_NAME (mode), cost);
3139
3140   (*cached)->cost = cost;
3141
3142   return cost;
3143 }
3144
3145 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3146    validity for a memory reference accessing memory of mode MODE in
3147    address space AS.  */
3148
3149 DEF_VEC_P (sbitmap);
3150 DEF_VEC_ALLOC_P (sbitmap, heap);
3151
3152 bool
3153 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3154                                  addr_space_t as)
3155 {
3156 #define MAX_RATIO 128
3157   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3158   static VEC (sbitmap, heap) *valid_mult_list;
3159   sbitmap valid_mult;
3160
3161   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3162     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3163
3164   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3165   if (!valid_mult)
3166     {
3167       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3168       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3169       rtx addr;
3170       HOST_WIDE_INT i;
3171
3172       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3173       sbitmap_zero (valid_mult);
3174       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3175       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3176         {
3177           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3178           if (memory_address_addr_space_p (mode, addr, as))
3179             SET_BIT (valid_mult, i + MAX_RATIO);
3180         }
3181
3182       if (dump_file && (dump_flags & TDF_DETAILS))
3183         {
3184           fprintf (dump_file, "  allowed multipliers:");
3185           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3186             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3187               fprintf (dump_file, " %d", (int) i);
3188           fprintf (dump_file, "\n");
3189           fprintf (dump_file, "\n");
3190         }
3191
3192       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3193     }
3194
3195   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3196     return false;
3197
3198   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3199 }
3200
3201 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3202    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3203    variable is omitted.  Compute the cost for a memory reference that accesses
3204    a memory location of mode MEM_MODE in address space AS.
3205
3206    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3207    size of MEM_MODE / RATIO) is available.  To make this determination, we
3208    look at the size of the increment to be made, which is given in CSTEP.
3209    CSTEP may be zero if the step is unknown.
3210    STMT_AFTER_INC is true iff the statement we're looking at is after the
3211    increment of the original biv.
3212
3213    TODO -- there must be some better way.  This all is quite crude.  */
3214
3215 typedef struct
3216 {
3217   HOST_WIDE_INT min_offset, max_offset;
3218   unsigned costs[2][2][2][2];
3219 } *address_cost_data;
3220
3221 DEF_VEC_P (address_cost_data);
3222 DEF_VEC_ALLOC_P (address_cost_data, heap);
3223
3224 static comp_cost
3225 get_address_cost (bool symbol_present, bool var_present,
3226                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3227                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3228                   addr_space_t as, bool speed,
3229                   bool stmt_after_inc, bool *may_autoinc)
3230 {
3231   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3232   static VEC(address_cost_data, heap) *address_cost_data_list;
3233   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3234   address_cost_data data;
3235   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3236   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3237   unsigned cost, acost, complexity;
3238   bool offset_p, ratio_p, autoinc;
3239   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3240   unsigned HOST_WIDE_INT mask;
3241   unsigned bits;
3242
3243   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3244     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3245                            data_index + 1);
3246
3247   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3248   if (!data)
3249     {
3250       HOST_WIDE_INT i;
3251       HOST_WIDE_INT rat, off = 0;
3252       int old_cse_not_expected, width;
3253       unsigned sym_p, var_p, off_p, rat_p, add_c;
3254       rtx seq, addr, base;
3255       rtx reg0, reg1;
3256
3257       data = (address_cost_data) xcalloc (1, sizeof (*data));
3258
3259       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3260
3261       width = GET_MODE_BITSIZE (address_mode) - 1;
3262       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3263         width = HOST_BITS_PER_WIDE_INT - 1;
3264       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3265
3266       for (i = width; i >= 0; i--)
3267         {
3268           off = -((HOST_WIDE_INT) 1 << i);
3269           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3270           if (memory_address_addr_space_p (mem_mode, addr, as))
3271             break;
3272         }
3273       data->min_offset = (i == -1? 0 : off);
3274
3275       for (i = width; i >= 0; i--)
3276         {
3277           off = ((HOST_WIDE_INT) 1 << i) - 1;
3278           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3279           if (memory_address_addr_space_p (mem_mode, addr, as))
3280             break;
3281         }
3282       if (i == -1)
3283         off = 0;
3284       data->max_offset = off;
3285
3286       if (dump_file && (dump_flags & TDF_DETAILS))
3287         {
3288           fprintf (dump_file, "get_address_cost:\n");
3289           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3290                    GET_MODE_NAME (mem_mode),
3291                    data->min_offset);
3292           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3293                    GET_MODE_NAME (mem_mode),
3294                    data->max_offset);
3295         }
3296
3297       rat = 1;
3298       for (i = 2; i <= MAX_RATIO; i++)
3299         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3300           {
3301             rat = i;
3302             break;
3303           }
3304
3305       /* Compute the cost of various addressing modes.  */
3306       acost = 0;
3307       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3308       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3309
3310       if (HAVE_PRE_DECREMENT)
3311         {
3312           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3313           has_predec[mem_mode]
3314             = memory_address_addr_space_p (mem_mode, addr, as);
3315         }
3316       if (HAVE_POST_DECREMENT)
3317         {
3318           addr = gen_rtx_POST_DEC (address_mode, reg0);
3319           has_postdec[mem_mode]
3320             = memory_address_addr_space_p (mem_mode, addr, as);
3321         }
3322       if (HAVE_PRE_INCREMENT)
3323         {
3324           addr = gen_rtx_PRE_INC (address_mode, reg0);
3325           has_preinc[mem_mode]
3326             = memory_address_addr_space_p (mem_mode, addr, as);
3327         }
3328       if (HAVE_POST_INCREMENT)
3329         {
3330           addr = gen_rtx_POST_INC (address_mode, reg0);
3331           has_postinc[mem_mode]
3332             = memory_address_addr_space_p (mem_mode, addr, as);
3333         }
3334       for (i = 0; i < 16; i++)
3335         {
3336           sym_p = i & 1;
3337           var_p = (i >> 1) & 1;
3338           off_p = (i >> 2) & 1;
3339           rat_p = (i >> 3) & 1;
3340
3341           addr = reg0;
3342           if (rat_p)
3343             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3344                                    gen_int_mode (rat, address_mode));
3345
3346           if (var_p)
3347             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3348
3349           if (sym_p)
3350             {
3351               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3352               /* ??? We can run into trouble with some backends by presenting
3353                  it with symbols which haven't been properly passed through
3354                  targetm.encode_section_info.  By setting the local bit, we
3355                  enhance the probability of things working.  */
3356               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3357
3358               if (off_p)
3359                 base = gen_rtx_fmt_e (CONST, address_mode,
3360                                       gen_rtx_fmt_ee
3361                                         (PLUS, address_mode, base,
3362                                          gen_int_mode (off, address_mode)));
3363             }
3364           else if (off_p)
3365             base = gen_int_mode (off, address_mode);
3366           else
3367             base = NULL_RTX;
3368
3369           if (base)
3370             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3371
3372           start_sequence ();
3373           /* To avoid splitting addressing modes, pretend that no cse will
3374              follow.  */
3375           old_cse_not_expected = cse_not_expected;
3376           cse_not_expected = true;
3377           addr = memory_address_addr_space (mem_mode, addr, as);
3378           cse_not_expected = old_cse_not_expected;
3379           seq = get_insns ();
3380           end_sequence ();
3381
3382           acost = seq_cost (seq, speed);
3383           acost += address_cost (addr, mem_mode, as, speed);
3384
3385           if (!acost)
3386             acost = 1;
3387           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3388         }
3389
3390       /* On some targets, it is quite expensive to load symbol to a register,
3391          which makes addresses that contain symbols look much more expensive.
3392          However, the symbol will have to be loaded in any case before the
3393          loop (and quite likely we have it in register already), so it does not
3394          make much sense to penalize them too heavily.  So make some final
3395          tweaks for the SYMBOL_PRESENT modes:
3396
3397          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3398          var is cheaper, use this mode with small penalty.
3399          If VAR_PRESENT is true, try whether the mode with
3400          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3401          if this is the case, use it.  */
3402       add_c = add_cost (address_mode, speed);
3403       for (i = 0; i < 8; i++)
3404         {
3405           var_p = i & 1;
3406           off_p = (i >> 1) & 1;
3407           rat_p = (i >> 2) & 1;
3408
3409           acost = data->costs[0][1][off_p][rat_p] + 1;
3410           if (var_p)
3411             acost += add_c;
3412
3413           if (acost < data->costs[1][var_p][off_p][rat_p])
3414             data->costs[1][var_p][off_p][rat_p] = acost;
3415         }
3416
3417       if (dump_file && (dump_flags & TDF_DETAILS))
3418         {
3419           fprintf (dump_file, "Address costs:\n");
3420
3421           for (i = 0; i < 16; i++)
3422             {
3423               sym_p = i & 1;
3424               var_p = (i >> 1) & 1;
3425               off_p = (i >> 2) & 1;
3426               rat_p = (i >> 3) & 1;
3427
3428               fprintf (dump_file, "  ");
3429               if (sym_p)
3430                 fprintf (dump_file, "sym + ");
3431               if (var_p)
3432                 fprintf (dump_file, "var + ");
3433               if (off_p)
3434                 fprintf (dump_file, "cst + ");
3435               if (rat_p)
3436                 fprintf (dump_file, "rat * ");
3437
3438               acost = data->costs[sym_p][var_p][off_p][rat_p];
3439               fprintf (dump_file, "index costs %d\n", acost);
3440             }
3441           if (has_predec[mem_mode] || has_postdec[mem_mode]
3442               || has_preinc[mem_mode] || has_postinc[mem_mode])
3443             fprintf (dump_file, "  May include autoinc/dec\n");
3444           fprintf (dump_file, "\n");
3445         }
3446
3447       VEC_replace (address_cost_data, address_cost_data_list,
3448                    data_index, data);
3449     }
3450
3451   bits = GET_MODE_BITSIZE (address_mode);
3452   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3453   offset &= mask;
3454   if ((offset >> (bits - 1) & 1))
3455     offset |= ~mask;
3456   s_offset = offset;
3457
3458   autoinc = false;
3459   msize = GET_MODE_SIZE (mem_mode);
3460   autoinc_offset = offset;
3461   if (stmt_after_inc)
3462     autoinc_offset += ratio * cstep;
3463   if (symbol_present || var_present || ratio != 1)
3464     autoinc = false;
3465   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3466                && msize == cstep)
3467            || (has_postdec[mem_mode] && autoinc_offset == 0
3468                && msize == -cstep)
3469            || (has_preinc[mem_mode] && autoinc_offset == msize
3470                && msize == cstep)
3471            || (has_predec[mem_mode] && autoinc_offset == -msize
3472                && msize == -cstep))
3473     autoinc = true;
3474
3475   cost = 0;
3476   offset_p = (s_offset != 0
3477               && data->min_offset <= s_offset
3478               && s_offset <= data->max_offset);
3479   ratio_p = (ratio != 1
3480              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3481
3482   if (ratio != 1 && !ratio_p)
3483     cost += multiply_by_cost (ratio, address_mode, speed);
3484
3485   if (s_offset && !offset_p && !symbol_present)
3486     cost += add_cost (address_mode, speed);
3487
3488   if (may_autoinc)
3489     *may_autoinc = autoinc;
3490   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3491   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3492   return new_cost (cost + acost, complexity);
3493 }
3494
3495 /* Estimates cost of forcing expression EXPR into a variable.  */
3496
3497 static comp_cost
3498 force_expr_to_var_cost (tree expr, bool speed)
3499 {
3500   static bool costs_initialized = false;
3501   static unsigned integer_cost [2];
3502   static unsigned symbol_cost [2];
3503   static unsigned address_cost [2];
3504   tree op0, op1;
3505   comp_cost cost0, cost1, cost;
3506   enum machine_mode mode;
3507
3508   if (!costs_initialized)
3509     {
3510       tree type = build_pointer_type (integer_type_node);
3511       tree var, addr;
3512       rtx x;
3513       int i;
3514
3515       var = create_tmp_var_raw (integer_type_node, "test_var");
3516       TREE_STATIC (var) = 1;
3517       x = produce_memory_decl_rtl (var, NULL);
3518       SET_DECL_RTL (var, x);
3519
3520       addr = build1 (ADDR_EXPR, type, var);
3521
3522
3523       for (i = 0; i < 2; i++)
3524         {
3525           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3526                                                              2000), i);
3527
3528           symbol_cost[i] = computation_cost (addr, i) + 1;
3529
3530           address_cost[i]
3531             = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3532                                         addr,
3533                                         build_int_cst (sizetype, 2000)), i) + 1;
3534           if (dump_file && (dump_flags & TDF_DETAILS))
3535             {
3536               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3537               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3538               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3539               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3540               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3541               fprintf (dump_file, "\n");
3542             }
3543         }
3544
3545       costs_initialized = true;
3546     }
3547
3548   STRIP_NOPS (expr);
3549
3550   if (SSA_VAR_P (expr))
3551     return zero_cost;
3552
3553   if (is_gimple_min_invariant (expr))
3554     {
3555       if (TREE_CODE (expr) == INTEGER_CST)
3556         return new_cost (integer_cost [speed], 0);
3557
3558       if (TREE_CODE (expr) == ADDR_EXPR)
3559         {
3560           tree obj = TREE_OPERAND (expr, 0);
3561
3562           if (TREE_CODE (obj) == VAR_DECL
3563               || TREE_CODE (obj) == PARM_DECL
3564               || TREE_CODE (obj) == RESULT_DECL)
3565             return new_cost (symbol_cost [speed], 0);
3566         }
3567
3568       return new_cost (address_cost [speed], 0);
3569     }
3570
3571   switch (TREE_CODE (expr))
3572     {
3573     case POINTER_PLUS_EXPR:
3574     case PLUS_EXPR:
3575     case MINUS_EXPR:
3576     case MULT_EXPR:
3577       op0 = TREE_OPERAND (expr, 0);
3578       op1 = TREE_OPERAND (expr, 1);
3579       STRIP_NOPS (op0);
3580       STRIP_NOPS (op1);
3581
3582       if (is_gimple_val (op0))
3583         cost0 = zero_cost;
3584       else
3585         cost0 = force_expr_to_var_cost (op0, speed);
3586
3587       if (is_gimple_val (op1))
3588         cost1 = zero_cost;
3589       else
3590         cost1 = force_expr_to_var_cost (op1, speed);
3591
3592       break;
3593
3594     case NEGATE_EXPR:
3595       op0 = TREE_OPERAND (expr, 0);
3596       STRIP_NOPS (op0);
3597       op1 = NULL_TREE;
3598
3599       if (is_gimple_val (op0))
3600         cost0 = zero_cost;
3601       else
3602         cost0 = force_expr_to_var_cost (op0, speed);
3603
3604       cost1 = zero_cost;
3605       break;
3606
3607     default:
3608       /* Just an arbitrary value, FIXME.  */
3609       return new_cost (target_spill_cost[speed], 0);
3610     }
3611
3612   mode = TYPE_MODE (TREE_TYPE (expr));
3613   switch (TREE_CODE (expr))
3614     {
3615     case POINTER_PLUS_EXPR:
3616     case PLUS_EXPR:
3617     case MINUS_EXPR:
3618     case NEGATE_EXPR:
3619       cost = new_cost (add_cost (mode, speed), 0);
3620       break;
3621
3622     case MULT_EXPR:
3623       if (cst_and_fits_in_hwi (op0))
3624         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3625       else if (cst_and_fits_in_hwi (op1))
3626         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3627       else
3628         return new_cost (target_spill_cost [speed], 0);
3629       break;
3630
3631     default:
3632       gcc_unreachable ();
3633     }
3634
3635   cost = add_costs (cost, cost0);
3636   cost = add_costs (cost, cost1);
3637
3638   /* Bound the cost by target_spill_cost.  The parts of complicated
3639      computations often are either loop invariant or at least can
3640      be shared between several iv uses, so letting this grow without
3641      limits would not give reasonable results.  */
3642   if (cost.cost > (int) target_spill_cost [speed])
3643     cost.cost = target_spill_cost [speed];
3644
3645   return cost;
3646 }
3647
3648 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3649    invariants the computation depends on.  */
3650
3651 static comp_cost
3652 force_var_cost (struct ivopts_data *data,
3653                 tree expr, bitmap *depends_on)
3654 {
3655   if (depends_on)
3656     {
3657       fd_ivopts_data = data;
3658       walk_tree (&expr, find_depends, depends_on, NULL);
3659     }
3660
3661   return force_expr_to_var_cost (expr, data->speed);
3662 }
3663
3664 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3665    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3666    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3667    invariants the computation depends on.  */
3668
3669 static comp_cost
3670 split_address_cost (struct ivopts_data *data,
3671                     tree addr, bool *symbol_present, bool *var_present,
3672                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3673 {
3674   tree core;
3675   HOST_WIDE_INT bitsize;
3676   HOST_WIDE_INT bitpos;
3677   tree toffset;
3678   enum machine_mode mode;
3679   int unsignedp, volatilep;
3680
3681   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3682                               &unsignedp, &volatilep, false);
3683
3684   if (toffset != 0
3685       || bitpos % BITS_PER_UNIT != 0
3686       || TREE_CODE (core) != VAR_DECL)
3687     {
3688       *symbol_present = false;
3689       *var_present = true;
3690       fd_ivopts_data = data;
3691       walk_tree (&addr, find_depends, depends_on, NULL);
3692       return new_cost (target_spill_cost[data->speed], 0);
3693     }
3694
3695   *offset += bitpos / BITS_PER_UNIT;
3696   if (TREE_STATIC (core)
3697       || DECL_EXTERNAL (core))
3698     {
3699       *symbol_present = true;
3700       *var_present = false;
3701       return zero_cost;
3702     }
3703
3704   *symbol_present = false;
3705   *var_present = true;
3706   return zero_cost;
3707 }
3708
3709 /* Estimates cost of expressing difference of addresses E1 - E2 as
3710    var + symbol + offset.  The value of offset is added to OFFSET,
3711    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3712    part is missing.  DEPENDS_ON is a set of the invariants the computation
3713    depends on.  */
3714
3715 static comp_cost
3716 ptr_difference_cost (struct ivopts_data *data,
3717                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3718                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3719 {
3720   HOST_WIDE_INT diff = 0;
3721   aff_tree aff_e1, aff_e2;
3722   tree type;
3723
3724   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3725
3726   if (ptr_difference_const (e1, e2, &diff))
3727     {
3728       *offset += diff;
3729       *symbol_present = false;
3730       *var_present = false;
3731       return zero_cost;
3732     }
3733
3734   if (integer_zerop (e2))
3735     return split_address_cost (data, TREE_OPERAND (e1, 0),
3736                                symbol_present, var_present, offset, depends_on);
3737
3738   *symbol_present = false;
3739   *var_present = true;
3740
3741   type = signed_type_for (TREE_TYPE (e1));
3742   tree_to_aff_combination (e1, type, &aff_e1);
3743   tree_to_aff_combination (e2, type, &aff_e2);
3744   aff_combination_scale (&aff_e2, double_int_minus_one);
3745   aff_combination_add (&aff_e1, &aff_e2);
3746
3747   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3748 }
3749
3750 /* Estimates cost of expressing difference E1 - E2 as
3751    var + symbol + offset.  The value of offset is added to OFFSET,
3752    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3753    part is missing.  DEPENDS_ON is a set of the invariants the computation
3754    depends on.  */
3755
3756 static comp_cost
3757 difference_cost (struct ivopts_data *data,
3758                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3759                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3760 {
3761   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3762   unsigned HOST_WIDE_INT off1, off2;
3763   aff_tree aff_e1, aff_e2;
3764   tree type;
3765
3766   e1 = strip_offset (e1, &off1);
3767   e2 = strip_offset (e2, &off2);
3768   *offset += off1 - off2;
3769
3770   STRIP_NOPS (e1);
3771   STRIP_NOPS (e2);
3772
3773   if (TREE_CODE (e1) == ADDR_EXPR)
3774     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3775                                 offset, depends_on);
3776   *symbol_present = false;
3777
3778   if (operand_equal_p (e1, e2, 0))
3779     {
3780       *var_present = false;
3781       return zero_cost;
3782     }
3783
3784   *var_present = true;
3785
3786   if (integer_zerop (e2))
3787     return force_var_cost (data, e1, depends_on);
3788
3789   if (integer_zerop (e1))
3790     {
3791       comp_cost cost = force_var_cost (data, e2, depends_on);
3792       cost.cost += multiply_by_cost (-1, mode, data->speed);
3793       return cost;
3794     }
3795
3796   type = signed_type_for (TREE_TYPE (e1));
3797   tree_to_aff_combination (e1, type, &aff_e1);
3798   tree_to_aff_combination (e2, type, &aff_e2);
3799   aff_combination_scale (&aff_e2, double_int_minus_one);
3800   aff_combination_add (&aff_e1, &aff_e2);
3801
3802   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3803 }
3804
3805 /* Returns true if AFF1 and AFF2 are identical.  */
3806
3807 static bool
3808 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3809 {
3810   unsigned i;
3811
3812   if (aff1->n != aff2->n)
3813     return false;
3814
3815   for (i = 0; i < aff1->n; i++)
3816     {
3817       if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3818         return false;
3819
3820       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3821         return false;
3822     }
3823   return true;
3824 }
3825
3826 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3827    requires a new compiler generated temporary.  Returns -1 otherwise.
3828    ADDRESS_P is a flag indicating if the expression is for address
3829    computation.  */
3830
3831 static int
3832 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3833                             tree cbase, HOST_WIDE_INT ratio,
3834                             bool address_p)
3835 {
3836   aff_tree ubase_aff, cbase_aff;
3837   tree expr, ub, cb;
3838   struct iv_inv_expr_ent ent;
3839   struct iv_inv_expr_ent **slot;
3840
3841   STRIP_NOPS (ubase);
3842   STRIP_NOPS (cbase);
3843   ub = ubase;
3844   cb = cbase;
3845
3846   if ((TREE_CODE (ubase) == INTEGER_CST)
3847       && (TREE_CODE (cbase) == INTEGER_CST))
3848     return -1;
3849
3850   /* Strips the constant part. */
3851   if (TREE_CODE (ubase) == PLUS_EXPR
3852       || TREE_CODE (ubase) == MINUS_EXPR
3853       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3854     {
3855       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3856         ubase = TREE_OPERAND (ubase, 0);
3857     }
3858
3859   /* Strips the constant part. */
3860   if (TREE_CODE (cbase) == PLUS_EXPR
3861       || TREE_CODE (cbase) == MINUS_EXPR
3862       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3863     {
3864       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3865         cbase = TREE_OPERAND (cbase, 0);
3866     }
3867
3868   if (address_p)
3869     {
3870       if (((TREE_CODE (ubase) == SSA_NAME)
3871            || (TREE_CODE (ubase) == ADDR_EXPR
3872                && is_gimple_min_invariant (ubase)))
3873           && (TREE_CODE (cbase) == INTEGER_CST))
3874         return -1;
3875
3876       if (((TREE_CODE (cbase) == SSA_NAME)
3877            || (TREE_CODE (cbase) == ADDR_EXPR
3878                && is_gimple_min_invariant (cbase)))
3879           && (TREE_CODE (ubase) == INTEGER_CST))
3880         return -1;
3881     }
3882
3883   if (ratio == 1)
3884     {
3885       if(operand_equal_p (ubase, cbase, 0))
3886         return -1;
3887
3888       if (TREE_CODE (ubase) == ADDR_EXPR
3889           && TREE_CODE (cbase) == ADDR_EXPR)
3890         {
3891           tree usym, csym;
3892
3893           usym = TREE_OPERAND (ubase, 0);
3894           csym = TREE_OPERAND (cbase, 0);
3895           if (TREE_CODE (usym) == ARRAY_REF)
3896             {
3897               tree ind = TREE_OPERAND (usym, 1);
3898               if (TREE_CODE (ind) == INTEGER_CST
3899                   && host_integerp (ind, 0)
3900                   && TREE_INT_CST_LOW (ind) == 0)
3901                 usym = TREE_OPERAND (usym, 0);
3902             }
3903           if (TREE_CODE (csym) == ARRAY_REF)
3904             {
3905               tree ind = TREE_OPERAND (csym, 1);
3906               if (TREE_CODE (ind) == INTEGER_CST
3907                   && host_integerp (ind, 0)
3908                   && TREE_INT_CST_LOW (ind) == 0)
3909                 csym = TREE_OPERAND (csym, 0);
3910             }
3911           if (operand_equal_p (usym, csym, 0))
3912             return -1;
3913         }
3914       /* Now do more complex comparison  */
3915       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3916       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3917       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3918         return -1;
3919     }
3920
3921   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3922   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3923
3924   aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3925   aff_combination_add (&ubase_aff, &cbase_aff);
3926   expr = aff_combination_to_tree (&ubase_aff);
3927   ent.expr = expr;
3928   ent.hash = iterative_hash_expr (expr, 0);
3929   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3930                                                      &ent, INSERT);
3931   if (*slot)
3932     return (*slot)->id;
3933
3934   *slot = XNEW (struct iv_inv_expr_ent);
3935   (*slot)->expr = expr;
3936   (*slot)->hash = ent.hash;
3937   (*slot)->id = data->inv_expr_id++;
3938   return  (*slot)->id;
3939 }
3940
3941
3942
3943 /* Determines the cost of the computation by that USE is expressed
3944    from induction variable CAND.  If ADDRESS_P is true, we just need
3945    to create an address from it, otherwise we want to get it into
3946    register.  A set of invariants we depend on is stored in
3947    DEPENDS_ON.  AT is the statement at that the value is computed.
3948    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3949    addressing is likely.  */
3950
3951 static comp_cost
3952 get_computation_cost_at (struct ivopts_data *data,
3953                          struct iv_use *use, struct iv_cand *cand,
3954                          bool address_p, bitmap *depends_on, gimple at,
3955                          bool *can_autoinc,
3956                          int *inv_expr_id)
3957 {
3958   tree ubase = use->iv->base, ustep = use->iv->step;
3959   tree cbase, cstep;
3960   tree utype = TREE_TYPE (ubase), ctype;
3961   unsigned HOST_WIDE_INT cstepi, offset = 0;
3962   HOST_WIDE_INT ratio, aratio;
3963   bool var_present, symbol_present, stmt_is_after_inc;
3964   comp_cost cost;
3965   double_int rat;
3966   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3967
3968   *depends_on = NULL;
3969
3970   /* Only consider real candidates.  */
3971   if (!cand->iv)
3972     return infinite_cost;
3973
3974   cbase = cand->iv->base;
3975   cstep = cand->iv->step;
3976   ctype = TREE_TYPE (cbase);
3977
3978   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3979     {
3980       /* We do not have a precision to express the values of use.  */
3981       return infinite_cost;
3982     }
3983
3984   if (address_p)
3985     {
3986       /* Do not try to express address of an object with computation based
3987          on address of a different object.  This may cause problems in rtl
3988          level alias analysis (that does not expect this to be happening,
3989          as this is illegal in C), and would be unlikely to be useful
3990          anyway.  */
3991       if (use->iv->base_object
3992           && cand->iv->base_object
3993           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3994         return infinite_cost;
3995     }
3996
3997   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3998     {
3999       /* TODO -- add direct handling of this case.  */
4000       goto fallback;
4001     }
4002
4003   /* CSTEPI is removed from the offset in case statement is after the
4004      increment.  If the step is not constant, we use zero instead.
4005      This is a bit imprecise (there is the extra addition), but
4006      redundancy elimination is likely to transform the code so that
4007      it uses value of the variable before increment anyway,
4008      so it is not that much unrealistic.  */
4009   if (cst_and_fits_in_hwi (cstep))
4010     cstepi = int_cst_value (cstep);
4011   else
4012     cstepi = 0;
4013
4014   if (!constant_multiple_of (ustep, cstep, &rat))
4015     return infinite_cost;
4016
4017   if (double_int_fits_in_shwi_p (rat))
4018     ratio = double_int_to_shwi (rat);
4019   else
4020     return infinite_cost;
4021
4022   STRIP_NOPS (cbase);
4023   ctype = TREE_TYPE (cbase);
4024
4025   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4026      or ratio == 1, it is better to handle this like
4027
4028      ubase - ratio * cbase + ratio * var
4029
4030      (also holds in the case ratio == -1, TODO.  */
4031
4032   if (cst_and_fits_in_hwi (cbase))
4033     {
4034       offset = - ratio * int_cst_value (cbase);
4035       cost = difference_cost (data,
4036                               ubase, build_int_cst (utype, 0),
4037                               &symbol_present, &var_present, &offset,
4038                               depends_on);
4039       cost.cost /= avg_loop_niter (data->current_loop);
4040     }
4041   else if (ratio == 1)
4042     {
4043       cost = difference_cost (data,
4044                               ubase, cbase,
4045                               &symbol_present, &var_present, &offset,
4046                               depends_on);
4047       cost.cost /= avg_loop_niter (data->current_loop);
4048     }
4049   else if (address_p
4050            && !POINTER_TYPE_P (ctype)
4051            && multiplier_allowed_in_address_p
4052                 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4053                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4054     {
4055       cbase
4056         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4057       cost = difference_cost (data,
4058                               ubase, cbase,
4059                               &symbol_present, &var_present, &offset,
4060                               depends_on);
4061       cost.cost /= avg_loop_niter (data->current_loop);
4062     }
4063   else
4064     {
4065       cost = force_var_cost (data, cbase, depends_on);
4066       cost = add_costs (cost,
4067                         difference_cost (data,
4068                                          ubase, build_int_cst (utype, 0),
4069                                          &symbol_present, &var_present,
4070                                          &offset, depends_on));
4071       cost.cost /= avg_loop_niter (data->current_loop);
4072       cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4073     }
4074
4075   if (inv_expr_id)
4076     {
4077       *inv_expr_id =
4078           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4079       /* Clear depends on.  */
4080       if (*inv_expr_id != -1 && depends_on && *depends_on)
4081         bitmap_clear (*depends_on);
4082     }
4083
4084   /* If we are after the increment, the value of the candidate is higher by
4085      one iteration.  */
4086   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4087   if (stmt_is_after_inc)
4088     offset -= ratio * cstepi;
4089
4090   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4091      (symbol/var1/const parts may be omitted).  If we are looking for an
4092      address, find the cost of addressing this.  */
4093   if (address_p)
4094     return add_costs (cost,
4095                       get_address_cost (symbol_present, var_present,
4096                                         offset, ratio, cstepi,
4097                                         TYPE_MODE (TREE_TYPE (utype)),
4098                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4099                                         speed, stmt_is_after_inc,
4100                                         can_autoinc));
4101
4102   /* Otherwise estimate the costs for computing the expression.  */
4103   if (!symbol_present && !var_present && !offset)
4104     {
4105       if (ratio != 1)
4106         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4107       return cost;
4108     }
4109
4110   /* Symbol + offset should be compile-time computable so consider that they
4111       are added once to the variable, if present.  */
4112   if (var_present && (symbol_present || offset))
4113     cost.cost += adjust_setup_cost (data,
4114                                     add_cost (TYPE_MODE (ctype), speed));
4115
4116   /* Having offset does not affect runtime cost in case it is added to
4117      symbol, but it increases complexity.  */
4118   if (offset)
4119     cost.complexity++;
4120
4121   cost.cost += add_cost (TYPE_MODE (ctype), speed);
4122
4123   aratio = ratio > 0 ? ratio : -ratio;
4124   if (aratio != 1)
4125     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4126   return cost;
4127
4128 fallback:
4129   if (can_autoinc)
4130     *can_autoinc = false;
4131
4132   {
4133     /* Just get the expression, expand it and measure the cost.  */
4134     tree comp = get_computation_at (data->current_loop, use, cand, at);
4135
4136     if (!comp)
4137       return infinite_cost;
4138
4139     if (address_p)
4140       comp = build_simple_mem_ref (comp);
4141
4142     return new_cost (computation_cost (comp, speed), 0);
4143   }
4144 }
4145
4146 /* Determines the cost of the computation by that USE is expressed
4147    from induction variable CAND.  If ADDRESS_P is true, we just need
4148    to create an address from it, otherwise we want to get it into
4149    register.  A set of invariants we depend on is stored in
4150    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4151    autoinc addressing is likely.  */
4152
4153 static comp_cost
4154 get_computation_cost (struct ivopts_data *data,
4155                       struct iv_use *use, struct iv_cand *cand,
4156                       bool address_p, bitmap *depends_on,
4157                       bool *can_autoinc, int *inv_expr_id)
4158 {
4159   return get_computation_cost_at (data,
4160                                   use, cand, address_p, depends_on, use->stmt,
4161                                   can_autoinc, inv_expr_id);
4162 }
4163
4164 /* Determines cost of basing replacement of USE on CAND in a generic
4165    expression.  */
4166
4167 static bool
4168 determine_use_iv_cost_generic (struct ivopts_data *data,
4169                                struct iv_use *use, struct iv_cand *cand)
4170 {
4171   bitmap depends_on;
4172   comp_cost cost;
4173   int inv_expr_id = -1;
4174
4175   /* The simple case first -- if we need to express value of the preserved
4176      original biv, the cost is 0.  This also prevents us from counting the
4177      cost of increment twice -- once at this use and once in the cost of
4178      the candidate.  */
4179   if (cand->pos == IP_ORIGINAL
4180       && cand->incremented_at == use->stmt)
4181     {
4182       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4183       return true;
4184     }
4185
4186   cost = get_computation_cost (data, use, cand, false, &depends_on,
4187                                NULL, &inv_expr_id);
4188
4189   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4190                    inv_expr_id);
4191
4192   return !infinite_cost_p (cost);
4193 }
4194
4195 /* Determines cost of basing replacement of USE on CAND in an address.  */
4196
4197 static bool
4198 determine_use_iv_cost_address (struct ivopts_data *data,
4199                                struct iv_use *use, struct iv_cand *cand)
4200 {
4201   bitmap depends_on;
4202   bool can_autoinc;
4203   int inv_expr_id = -1;
4204   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4205                                          &can_autoinc, &inv_expr_id);
4206
4207   if (cand->ainc_use == use)
4208     {
4209       if (can_autoinc)
4210         cost.cost -= cand->cost_step;
4211       /* If we generated the candidate solely for exploiting autoincrement
4212          opportunities, and it turns out it can't be used, set the cost to
4213          infinity to make sure we ignore it.  */
4214       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4215         cost = infinite_cost;
4216     }
4217   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4218                    inv_expr_id);
4219
4220   return !infinite_cost_p (cost);
4221 }
4222
4223 /* Computes value of candidate CAND at position AT in iteration NITER, and
4224    stores it to VAL.  */
4225
4226 static void
4227 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4228                aff_tree *val)
4229 {
4230   aff_tree step, delta, nit;
4231   struct iv *iv = cand->iv;
4232   tree type = TREE_TYPE (iv->base);
4233   tree steptype = type;
4234   if (POINTER_TYPE_P (type))
4235     steptype = sizetype;
4236
4237   tree_to_aff_combination (iv->step, steptype, &step);
4238   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4239   aff_combination_convert (&nit, steptype);
4240   aff_combination_mult (&nit, &step, &delta);
4241   if (stmt_after_increment (loop, cand, at))
4242     aff_combination_add (&delta, &step);
4243
4244   tree_to_aff_combination (iv->base, type, val);
4245   aff_combination_add (val, &delta);
4246 }
4247
4248 /* Returns period of induction variable iv.  */
4249
4250 static tree
4251 iv_period (struct iv *iv)
4252 {
4253   tree step = iv->step, period, type;
4254   tree pow2div;
4255
4256   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4257
4258   type = unsigned_type_for (TREE_TYPE (step));
4259   /* Period of the iv is lcm (step, type_range)/step -1,
4260      i.e., N*type_range/step - 1. Since type range is power
4261      of two, N == (step >> num_of_ending_zeros_binary (step),
4262      so the final result is
4263
4264        (type_range >> num_of_ending_zeros_binary (step)) - 1
4265
4266   */
4267   pow2div = num_ending_zeros (step);
4268
4269   period = build_low_bits_mask (type,
4270                                 (TYPE_PRECISION (type)
4271                                  - tree_low_cst (pow2div, 1)));
4272
4273   return period;
4274 }
4275
4276 /* Returns the comparison operator used when eliminating the iv USE.  */
4277
4278 static enum tree_code
4279 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4280 {
4281   struct loop *loop = data->current_loop;
4282   basic_block ex_bb;
4283   edge exit;
4284
4285   ex_bb = gimple_bb (use->stmt);
4286   exit = EDGE_SUCC (ex_bb, 0);
4287   if (flow_bb_inside_loop_p (loop, exit->dest))
4288     exit = EDGE_SUCC (ex_bb, 1);
4289
4290   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4291 }
4292
4293 /* Check whether it is possible to express the condition in USE by comparison
4294    of candidate CAND.  If so, store the value compared with to BOUND.  */
4295
4296 static bool
4297 may_eliminate_iv (struct ivopts_data *data,
4298                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4299 {
4300   basic_block ex_bb;
4301   edge exit;
4302   tree nit, period;
4303   struct loop *loop = data->current_loop;
4304   aff_tree bnd;
4305   struct tree_niter_desc *desc = NULL;
4306
4307   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4308     return false;
4309
4310   /* For now works only for exits that dominate the loop latch.
4311      TODO: extend to other conditions inside loop body.  */
4312   ex_bb = gimple_bb (use->stmt);
4313   if (use->stmt != last_stmt (ex_bb)
4314       || gimple_code (use->stmt) != GIMPLE_COND
4315       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4316     return false;
4317
4318   exit = EDGE_SUCC (ex_bb, 0);
4319   if (flow_bb_inside_loop_p (loop, exit->dest))
4320     exit = EDGE_SUCC (ex_bb, 1);
4321   if (flow_bb_inside_loop_p (loop, exit->dest))
4322     return false;
4323
4324   nit = niter_for_exit (data, exit, &desc);
4325   if (!nit)
4326     return false;
4327
4328   /* Determine whether we can use the variable to test the exit condition.
4329      This is the case iff the period of the induction variable is greater
4330      than the number of iterations for which the exit condition is true.  */
4331   period = iv_period (cand->iv);
4332
4333   /* If the number of iterations is constant, compare against it directly.  */
4334   if (TREE_CODE (nit) == INTEGER_CST)
4335     {
4336       /* See cand_value_at.  */
4337       if (stmt_after_increment (loop, cand, use->stmt))
4338         {
4339           if (!tree_int_cst_lt (nit, period))
4340             return false;
4341         }
4342       else
4343         {
4344           if (tree_int_cst_lt (period, nit))
4345             return false;
4346         }
4347     }
4348
4349   /* If not, and if this is the only possible exit of the loop, see whether
4350      we can get a conservative estimate on the number of iterations of the
4351      entire loop and compare against that instead.  */
4352   else
4353     {
4354       double_int period_value, max_niter;
4355
4356       max_niter = desc->max;
4357       if (stmt_after_increment (loop, cand, use->stmt))
4358         max_niter = double_int_add (max_niter, double_int_one);
4359       period_value = tree_to_double_int (period);
4360       if (double_int_ucmp (max_niter, period_value) > 0)
4361         {
4362           /* See if we can take advantage of infered loop bound information.  */
4363           if (loop_only_exit_p (loop, exit))
4364             {
4365               if (!estimated_loop_iterations (loop, true, &max_niter))
4366                 return false;
4367               /* The loop bound is already adjusted by adding 1.  */
4368               if (double_int_ucmp (max_niter, period_value) > 0)
4369                 return false;
4370             }
4371           else
4372             return false;
4373         }
4374     }
4375
4376   cand_value_at (loop, cand, use->stmt, nit, &bnd);
4377
4378   *bound = aff_combination_to_tree (&bnd);
4379   /* It is unlikely that computing the number of iterations using division
4380      would be more profitable than keeping the original induction variable.  */
4381   if (expression_expensive_p (*bound))
4382     return false;
4383   return true;
4384 }
4385
4386
4387 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4388
4389 static bool
4390 determine_use_iv_cost_condition (struct ivopts_data *data,
4391                                  struct iv_use *use, struct iv_cand *cand)
4392 {
4393   tree bound = NULL_TREE;
4394   struct iv *cmp_iv;
4395   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4396   comp_cost elim_cost, express_cost, cost;
4397   bool ok;
4398   int inv_expr_id = -1;
4399   tree *control_var, *bound_cst;
4400
4401   /* Only consider real candidates.  */
4402   if (!cand->iv)
4403     {
4404       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4405       return false;
4406     }
4407
4408   /* Try iv elimination.  */
4409   if (may_eliminate_iv (data, use, cand, &bound))
4410     {
4411       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4412       /* The bound is a loop invariant, so it will be only computed
4413          once.  */
4414       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4415     }
4416   else
4417     elim_cost = infinite_cost;
4418
4419   /* Try expressing the original giv.  If it is compared with an invariant,
4420      note that we cannot get rid of it.  */
4421   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4422                               NULL, &cmp_iv);
4423   gcc_assert (ok);
4424
4425   /* When the condition is a comparison of the candidate IV against
4426      zero, prefer this IV.
4427
4428      TODO: The constant that we're substracting from the cost should
4429      be target-dependent.  This information should be added to the
4430      target costs for each backend.  */
4431   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4432       && integer_zerop (*bound_cst)
4433       && (operand_equal_p (*control_var, cand->var_after, 0)
4434           || operand_equal_p (*control_var, cand->var_before, 0)))
4435     elim_cost.cost -= 1;
4436
4437   express_cost = get_computation_cost (data, use, cand, false,
4438                                        &depends_on_express, NULL,
4439                                        &inv_expr_id);
4440   fd_ivopts_data = data;
4441   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4442
4443   /* Choose the better approach, preferring the eliminated IV. */
4444   if (compare_costs (elim_cost, express_cost) <= 0)
4445     {
4446       cost = elim_cost;
4447       depends_on = depends_on_elim;
4448       depends_on_elim = NULL;
4449     }
4450   else
4451     {
4452       cost = express_cost;
4453       depends_on = depends_on_express;
4454       depends_on_express = NULL;
4455       bound = NULL_TREE;
4456     }
4457
4458   set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4459
4460   if (depends_on_elim)
4461     BITMAP_FREE (depends_on_elim);
4462   if (depends_on_express)
4463     BITMAP_FREE (depends_on_express);
4464
4465   return !infinite_cost_p (cost);
4466 }
4467
4468 /* Determines cost of basing replacement of USE on CAND.  Returns false
4469    if USE cannot be based on CAND.  */
4470
4471 static bool
4472 determine_use_iv_cost (struct ivopts_data *data,
4473                        struct iv_use *use, struct iv_cand *cand)
4474 {
4475   switch (use->type)
4476     {
4477     case USE_NONLINEAR_EXPR:
4478       return determine_use_iv_cost_generic (data, use, cand);
4479
4480     case USE_ADDRESS:
4481       return determine_use_iv_cost_address (data, use, cand);
4482
4483     case USE_COMPARE:
4484       return determine_use_iv_cost_condition (data, use, cand);
4485
4486     default:
4487       gcc_unreachable ();
4488     }
4489 }
4490
4491 /* Return true if get_computation_cost indicates that autoincrement is
4492    a possibility for the pair of USE and CAND, false otherwise.  */
4493
4494 static bool
4495 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4496                            struct iv_cand *cand)
4497 {
4498   bitmap depends_on;
4499   bool can_autoinc;
4500   comp_cost cost;
4501
4502   if (use->type != USE_ADDRESS)
4503     return false;
4504
4505   cost = get_computation_cost (data, use, cand, true, &depends_on,
4506                                &can_autoinc, NULL);
4507
4508   BITMAP_FREE (depends_on);
4509
4510   return !infinite_cost_p (cost) && can_autoinc;
4511 }
4512
4513 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4514    use that allows autoincrement, and set their AINC_USE if possible.  */
4515
4516 static void
4517 set_autoinc_for_original_candidates (struct ivopts_data *data)
4518 {
4519   unsigned i, j;
4520
4521   for (i = 0; i < n_iv_cands (data); i++)
4522     {
4523       struct iv_cand *cand = iv_cand (data, i);
4524       struct iv_use *closest = NULL;
4525       if (cand->pos != IP_ORIGINAL)
4526         continue;
4527       for (j = 0; j < n_iv_uses (data); j++)
4528         {
4529           struct iv_use *use = iv_use (data, j);
4530           unsigned uid = gimple_uid (use->stmt);
4531           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4532               || uid > gimple_uid (cand->incremented_at))
4533             continue;
4534           if (closest == NULL || uid > gimple_uid (closest->stmt))
4535             closest = use;
4536         }
4537       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4538         continue;
4539       cand->ainc_use = closest;
4540     }
4541 }
4542
4543 /* Finds the candidates for the induction variables.  */
4544
4545 static void
4546 find_iv_candidates (struct ivopts_data *data)
4547 {
4548   /* Add commonly used ivs.  */
4549   add_standard_iv_candidates (data);
4550
4551   /* Add old induction variables.  */
4552   add_old_ivs_candidates (data);
4553
4554   /* Add induction variables derived from uses.  */
4555   add_derived_ivs_candidates (data);
4556
4557   set_autoinc_for_original_candidates (data);
4558
4559   /* Record the important candidates.  */
4560   record_important_candidates (data);
4561 }
4562
4563 /* Determines costs of basing the use of the iv on an iv candidate.  */
4564
4565 static void
4566 determine_use_iv_costs (struct ivopts_data *data)
4567 {
4568   unsigned i, j;
4569   struct iv_use *use;
4570   struct iv_cand *cand;
4571   bitmap to_clear = BITMAP_ALLOC (NULL);
4572
4573   alloc_use_cost_map (data);
4574
4575   for (i = 0; i < n_iv_uses (data); i++)
4576     {
4577       use = iv_use (data, i);
4578
4579       if (data->consider_all_candidates)
4580         {
4581           for (j = 0; j < n_iv_cands (data); j++)
4582             {
4583               cand = iv_cand (data, j);
4584               determine_use_iv_cost (data, use, cand);
4585             }
4586         }
4587       else
4588         {
4589           bitmap_iterator bi;
4590
4591           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4592             {
4593               cand = iv_cand (data, j);
4594               if (!determine_use_iv_cost (data, use, cand))
4595                 bitmap_set_bit (to_clear, j);
4596             }
4597
4598           /* Remove the candidates for that the cost is infinite from
4599              the list of related candidates.  */
4600           bitmap_and_compl_into (use->related_cands, to_clear);
4601           bitmap_clear (to_clear);
4602         }
4603     }
4604
4605   BITMAP_FREE (to_clear);
4606
4607   if (dump_file && (dump_flags & TDF_DETAILS))
4608     {
4609       fprintf (dump_file, "Use-candidate costs:\n");
4610
4611       for (i = 0; i < n_iv_uses (data); i++)
4612         {
4613           use = iv_use (data, i);
4614
4615           fprintf (dump_file, "Use %d:\n", i);
4616           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4617           for (j = 0; j < use->n_map_members; j++)
4618             {
4619               if (!use->cost_map[j].cand
4620                   || infinite_cost_p (use->cost_map[j].cost))
4621                 continue;
4622
4623               fprintf (dump_file, "  %d\t%d\t%d\t",
4624                        use->cost_map[j].cand->id,
4625                        use->cost_map[j].cost.cost,
4626                        use->cost_map[j].cost.complexity);
4627               if (use->cost_map[j].depends_on)
4628                 bitmap_print (dump_file,
4629                               use->cost_map[j].depends_on, "","");
4630               if (use->cost_map[j].inv_expr_id != -1)
4631                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4632               fprintf (dump_file, "\n");
4633             }
4634
4635           fprintf (dump_file, "\n");
4636         }
4637       fprintf (dump_file, "\n");
4638     }
4639 }
4640
4641 /* Determines cost of the candidate CAND.  */
4642
4643 static void
4644 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4645 {
4646   comp_cost cost_base;
4647   unsigned cost, cost_step;
4648   tree base;
4649
4650   if (!cand->iv)
4651     {
4652       cand->cost = 0;
4653       return;
4654     }
4655
4656   /* There are two costs associated with the candidate -- its increment
4657      and its initialization.  The second is almost negligible for any loop
4658      that rolls enough, so we take it just very little into account.  */
4659
4660   base = cand->iv->base;
4661   cost_base = force_var_cost (data, base, NULL);
4662   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4663
4664   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4665
4666   /* Prefer the original ivs unless we may gain something by replacing it.
4667      The reason is to make debugging simpler; so this is not relevant for
4668      artificial ivs created by other optimization passes.  */
4669   if (cand->pos != IP_ORIGINAL
4670       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4671     cost++;
4672
4673   /* Prefer not to insert statements into latch unless there are some
4674      already (so that we do not create unnecessary jumps).  */
4675   if (cand->pos == IP_END
4676       && empty_block_p (ip_end_pos (data->current_loop)))
4677     cost++;
4678
4679   cand->cost = cost;
4680   cand->cost_step = cost_step;
4681 }
4682
4683 /* Determines costs of computation of the candidates.  */
4684
4685 static void
4686 determine_iv_costs (struct ivopts_data *data)
4687 {
4688   unsigned i;
4689
4690   if (dump_file && (dump_flags & TDF_DETAILS))
4691     {
4692       fprintf (dump_file, "Candidate costs:\n");
4693       fprintf (dump_file, "  cand\tcost\n");
4694     }
4695
4696   for (i = 0; i < n_iv_cands (data); i++)
4697     {
4698       struct iv_cand *cand = iv_cand (data, i);
4699
4700       determine_iv_cost (data, cand);
4701
4702       if (dump_file && (dump_flags & TDF_DETAILS))
4703         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4704     }
4705
4706   if (dump_file && (dump_flags & TDF_DETAILS))
4707     fprintf (dump_file, "\n");
4708 }
4709
4710 /* Calculates cost for having SIZE induction variables.  */
4711
4712 static unsigned
4713 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4714 {
4715   /* We add size to the cost, so that we prefer eliminating ivs
4716      if possible.  */
4717   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4718                                             data->body_includes_call);
4719 }
4720
4721 /* For each size of the induction variable set determine the penalty.  */
4722
4723 static void
4724 determine_set_costs (struct ivopts_data *data)
4725 {
4726   unsigned j, n;
4727   gimple phi;
4728   gimple_stmt_iterator psi;
4729   tree op;
4730   struct loop *loop = data->current_loop;
4731   bitmap_iterator bi;
4732
4733   if (dump_file && (dump_flags & TDF_DETAILS))
4734     {
4735       fprintf (dump_file, "Global costs:\n");
4736       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4737       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
4738       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
4739       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
4740     }
4741
4742   n = 0;
4743   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4744     {
4745       phi = gsi_stmt (psi);
4746       op = PHI_RESULT (phi);
4747
4748       if (!is_gimple_reg (op))
4749         continue;
4750
4751       if (get_iv (data, op))
4752         continue;
4753
4754       n++;
4755     }
4756
4757   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4758     {
4759       struct version_info *info = ver_info (data, j);
4760
4761       if (info->inv_id && info->has_nonlin_use)
4762         n++;
4763     }
4764
4765   data->regs_used = n;
4766   if (dump_file && (dump_flags & TDF_DETAILS))
4767     fprintf (dump_file, "  regs_used %d\n", n);
4768
4769   if (dump_file && (dump_flags & TDF_DETAILS))
4770     {
4771       fprintf (dump_file, "  cost for size:\n");
4772       fprintf (dump_file, "  ivs\tcost\n");
4773       for (j = 0; j <= 2 * target_avail_regs; j++)
4774         fprintf (dump_file, "  %d\t%d\n", j,
4775                  ivopts_global_cost_for_size (data, j));
4776       fprintf (dump_file, "\n");
4777     }
4778 }
4779
4780 /* Returns true if A is a cheaper cost pair than B.  */
4781
4782 static bool
4783 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4784 {
4785   int cmp;
4786
4787   if (!a)
4788     return false;
4789
4790   if (!b)
4791     return true;
4792
4793   cmp = compare_costs (a->cost, b->cost);
4794   if (cmp < 0)
4795     return true;
4796
4797   if (cmp > 0)
4798     return false;
4799
4800   /* In case the costs are the same, prefer the cheaper candidate.  */
4801   if (a->cand->cost < b->cand->cost)
4802     return true;
4803
4804   return false;
4805 }
4806
4807
4808 /* Returns candidate by that USE is expressed in IVS.  */
4809
4810 static struct cost_pair *
4811 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4812 {
4813   return ivs->cand_for_use[use->id];
4814 }
4815
4816 /* Computes the cost field of IVS structure.  */
4817
4818 static void
4819 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4820 {
4821   comp_cost cost = ivs->cand_use_cost;
4822
4823   cost.cost += ivs->cand_cost;
4824
4825   cost.cost += ivopts_global_cost_for_size (data,
4826                                             ivs->n_regs + ivs->num_used_inv_expr);
4827
4828   ivs->cost = cost;
4829 }
4830
4831 /* Remove invariants in set INVS to set IVS.  */
4832
4833 static void
4834 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4835 {
4836   bitmap_iterator bi;
4837   unsigned iid;
4838
4839   if (!invs)
4840     return;
4841
4842   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4843     {
4844       ivs->n_invariant_uses[iid]--;
4845       if (ivs->n_invariant_uses[iid] == 0)
4846         ivs->n_regs--;
4847     }
4848 }
4849
4850 /* Set USE not to be expressed by any candidate in IVS.  */
4851
4852 static void
4853 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4854                  struct iv_use *use)
4855 {
4856   unsigned uid = use->id, cid;
4857   struct cost_pair *cp;
4858
4859   cp = ivs->cand_for_use[uid];
4860   if (!cp)
4861     return;
4862   cid = cp->cand->id;
4863
4864   ivs->bad_uses++;
4865   ivs->cand_for_use[uid] = NULL;
4866   ivs->n_cand_uses[cid]--;
4867
4868   if (ivs->n_cand_uses[cid] == 0)
4869     {
4870       bitmap_clear_bit (ivs->cands, cid);
4871       /* Do not count the pseudocandidates.  */
4872       if (cp->cand->iv)
4873         ivs->n_regs--;
4874       ivs->n_cands--;
4875       ivs->cand_cost -= cp->cand->cost;
4876
4877       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4878     }
4879
4880   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4881
4882   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4883
4884   if (cp->inv_expr_id != -1)
4885     {
4886       ivs->used_inv_expr[cp->inv_expr_id]--;
4887       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
4888         ivs->num_used_inv_expr--;
4889     }
4890   iv_ca_recount_cost (data, ivs);
4891 }
4892
4893 /* Add invariants in set INVS to set IVS.  */
4894
4895 static void
4896 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4897 {
4898   bitmap_iterator bi;
4899   unsigned iid;
4900
4901   if (!invs)
4902     return;
4903
4904   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4905     {
4906       ivs->n_invariant_uses[iid]++;
4907       if (ivs->n_invariant_uses[iid] == 1)
4908         ivs->n_regs++;
4909     }
4910 }
4911
4912 /* Set cost pair for USE in set IVS to CP.  */
4913
4914 static void
4915 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4916               struct iv_use *use, struct cost_pair *cp)
4917 {
4918   unsigned uid = use->id, cid;
4919
4920   if (ivs->cand_for_use[uid] == cp)
4921     return;
4922
4923   if (ivs->cand_for_use[uid])
4924     iv_ca_set_no_cp (data, ivs, use);
4925
4926   if (cp)
4927     {
4928       cid = cp->cand->id;
4929
4930       ivs->bad_uses--;
4931       ivs->cand_for_use[uid] = cp;
4932       ivs->n_cand_uses[cid]++;
4933       if (ivs->n_cand_uses[cid] == 1)
4934         {
4935           bitmap_set_bit (ivs->cands, cid);
4936           /* Do not count the pseudocandidates.  */
4937           if (cp->cand->iv)
4938             ivs->n_regs++;
4939           ivs->n_cands++;
4940           ivs->cand_cost += cp->cand->cost;
4941
4942           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4943         }
4944
4945       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4946       iv_ca_set_add_invariants (ivs, cp->depends_on);
4947
4948       if (cp->inv_expr_id != -1)
4949         {
4950           ivs->used_inv_expr[cp->inv_expr_id]++;
4951           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
4952             ivs->num_used_inv_expr++;
4953         }
4954       iv_ca_recount_cost (data, ivs);
4955     }
4956 }
4957
4958 /* Extend set IVS by expressing USE by some of the candidates in it
4959    if possible. All important candidates will be considered
4960    if IMPORTANT_CANDIDATES is true.  */
4961
4962 static void
4963 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4964                struct iv_use *use, bool important_candidates)
4965 {
4966   struct cost_pair *best_cp = NULL, *cp;
4967   bitmap_iterator bi;
4968   bitmap cands;
4969   unsigned i;
4970
4971   gcc_assert (ivs->upto >= use->id);
4972
4973   if (ivs->upto == use->id)
4974     {
4975       ivs->upto++;
4976       ivs->bad_uses++;
4977     }
4978
4979   cands = (important_candidates ? data->important_candidates : ivs->cands);
4980   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
4981     {
4982       struct iv_cand *cand = iv_cand (data, i);
4983
4984       cp = get_use_iv_cost (data, use, cand);
4985
4986       if (cheaper_cost_pair (cp, best_cp))
4987         best_cp = cp;
4988     }
4989
4990   iv_ca_set_cp (data, ivs, use, best_cp);
4991 }
4992
4993 /* Get cost for assignment IVS.  */
4994
4995 static comp_cost
4996 iv_ca_cost (struct iv_ca *ivs)
4997 {
4998   /* This was a conditional expression but it triggered a bug in
4999      Sun C 5.5.  */
5000   if (ivs->bad_uses)
5001     return infinite_cost;
5002   else
5003     return ivs->cost;
5004 }
5005
5006 /* Returns true if all dependences of CP are among invariants in IVS.  */
5007
5008 static bool
5009 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5010 {
5011   unsigned i;
5012   bitmap_iterator bi;
5013
5014   if (!cp->depends_on)
5015     return true;
5016
5017   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5018     {
5019       if (ivs->n_invariant_uses[i] == 0)
5020         return false;
5021     }
5022
5023   return true;
5024 }
5025
5026 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5027    it before NEXT_CHANGE.  */
5028
5029 static struct iv_ca_delta *
5030 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5031                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5032 {
5033   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5034
5035   change->use = use;
5036   change->old_cp = old_cp;
5037   change->new_cp = new_cp;
5038   change->next_change = next_change;
5039
5040   return change;
5041 }
5042
5043 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5044    are rewritten.  */
5045
5046 static struct iv_ca_delta *
5047 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5048 {
5049   struct iv_ca_delta *last;
5050
5051   if (!l2)
5052     return l1;
5053
5054   if (!l1)
5055     return l2;
5056
5057   for (last = l1; last->next_change; last = last->next_change)
5058     continue;
5059   last->next_change = l2;
5060
5061   return l1;
5062 }
5063
5064 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5065
5066 static struct iv_ca_delta *
5067 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5068 {
5069   struct iv_ca_delta *act, *next, *prev = NULL;
5070   struct cost_pair *tmp;
5071
5072   for (act = delta; act; act = next)
5073     {
5074       next = act->next_change;
5075       act->next_change = prev;
5076       prev = act;
5077
5078       tmp = act->old_cp;
5079       act->old_cp = act->new_cp;
5080       act->new_cp = tmp;
5081     }
5082
5083   return prev;
5084 }
5085
5086 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5087    reverted instead.  */
5088
5089 static void
5090 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5091                     struct iv_ca_delta *delta, bool forward)
5092 {
5093   struct cost_pair *from, *to;
5094   struct iv_ca_delta *act;
5095
5096   if (!forward)
5097     delta = iv_ca_delta_reverse (delta);
5098
5099   for (act = delta; act; act = act->next_change)
5100     {
5101       from = act->old_cp;
5102       to = act->new_cp;
5103       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5104       iv_ca_set_cp (data, ivs, act->use, to);
5105     }
5106
5107   if (!forward)
5108     iv_ca_delta_reverse (delta);
5109 }
5110
5111 /* Returns true if CAND is used in IVS.  */
5112
5113 static bool
5114 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5115 {
5116   return ivs->n_cand_uses[cand->id] > 0;
5117 }
5118
5119 /* Returns number of induction variable candidates in the set IVS.  */
5120
5121 static unsigned
5122 iv_ca_n_cands (struct iv_ca *ivs)
5123 {
5124   return ivs->n_cands;
5125 }
5126
5127 /* Free the list of changes DELTA.  */
5128
5129 static void
5130 iv_ca_delta_free (struct iv_ca_delta **delta)
5131 {
5132   struct iv_ca_delta *act, *next;
5133
5134   for (act = *delta; act; act = next)
5135     {
5136       next = act->next_change;
5137       free (act);
5138     }
5139
5140   *delta = NULL;
5141 }
5142
5143 /* Allocates new iv candidates assignment.  */
5144
5145 static struct iv_ca *
5146 iv_ca_new (struct ivopts_data *data)
5147 {
5148   struct iv_ca *nw = XNEW (struct iv_ca);
5149
5150   nw->upto = 0;
5151   nw->bad_uses = 0;
5152   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5153   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5154   nw->cands = BITMAP_ALLOC (NULL);
5155   nw->n_cands = 0;
5156   nw->n_regs = 0;
5157   nw->cand_use_cost = zero_cost;
5158   nw->cand_cost = 0;
5159   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5160   nw->cost = zero_cost;
5161   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5162   nw->num_used_inv_expr = 0;
5163
5164   return nw;
5165 }
5166
5167 /* Free memory occupied by the set IVS.  */
5168
5169 static void
5170 iv_ca_free (struct iv_ca **ivs)
5171 {
5172   free ((*ivs)->cand_for_use);
5173   free ((*ivs)->n_cand_uses);
5174   BITMAP_FREE ((*ivs)->cands);
5175   free ((*ivs)->n_invariant_uses);
5176   free ((*ivs)->used_inv_expr);
5177   free (*ivs);
5178   *ivs = NULL;
5179 }
5180
5181 /* Dumps IVS to FILE.  */
5182
5183 static void
5184 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5185 {
5186   const char *pref = "  invariants ";
5187   unsigned i;
5188   comp_cost cost = iv_ca_cost (ivs);
5189
5190   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5191   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5192            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5193   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5194
5195    for (i = 0; i < ivs->upto; i++)
5196     {
5197       struct iv_use *use = iv_use (data, i);
5198       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5199       if (cp)
5200         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5201                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5202       else
5203         fprintf (file, "   use:%d --> ??\n", use->id);
5204     }
5205
5206   for (i = 1; i <= data->max_inv_id; i++)
5207     if (ivs->n_invariant_uses[i])
5208       {
5209         fprintf (file, "%s%d", pref, i);
5210         pref = ", ";
5211       }
5212   fprintf (file, "\n\n");
5213 }
5214
5215 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5216    new set, and store differences in DELTA.  Number of induction variables
5217    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5218    the function will try to find a solution with mimimal iv candidates.  */
5219
5220 static comp_cost
5221 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5222               struct iv_cand *cand, struct iv_ca_delta **delta,
5223               unsigned *n_ivs, bool min_ncand)
5224 {
5225   unsigned i;
5226   comp_cost cost;
5227   struct iv_use *use;
5228   struct cost_pair *old_cp, *new_cp;
5229
5230   *delta = NULL;
5231   for (i = 0; i < ivs->upto; i++)
5232     {
5233       use = iv_use (data, i);
5234       old_cp = iv_ca_cand_for_use (ivs, use);
5235
5236       if (old_cp
5237           && old_cp->cand == cand)
5238         continue;
5239
5240       new_cp = get_use_iv_cost (data, use, cand);
5241       if (!new_cp)
5242         continue;
5243
5244       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5245         continue;
5246
5247       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5248         continue;
5249
5250       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5251     }
5252
5253   iv_ca_delta_commit (data, ivs, *delta, true);
5254   cost = iv_ca_cost (ivs);
5255   if (n_ivs)
5256     *n_ivs = iv_ca_n_cands (ivs);
5257   iv_ca_delta_commit (data, ivs, *delta, false);
5258
5259   return cost;
5260 }
5261
5262 /* Try narrowing set IVS by removing CAND.  Return the cost of
5263    the new set and store the differences in DELTA.  */
5264
5265 static comp_cost
5266 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5267               struct iv_cand *cand, struct iv_ca_delta **delta)
5268 {
5269   unsigned i, ci;
5270   struct iv_use *use;
5271   struct cost_pair *old_cp, *new_cp, *cp;
5272   bitmap_iterator bi;
5273   struct iv_cand *cnd;
5274   comp_cost cost;
5275
5276   *delta = NULL;
5277   for (i = 0; i < n_iv_uses (data); i++)
5278     {
5279       use = iv_use (data, i);
5280
5281       old_cp = iv_ca_cand_for_use (ivs, use);
5282       if (old_cp->cand != cand)
5283         continue;
5284
5285       new_cp = NULL;
5286
5287       if (data->consider_all_candidates)
5288         {
5289           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5290             {
5291               if (ci == cand->id)
5292                 continue;
5293
5294               cnd = iv_cand (data, ci);
5295
5296               cp = get_use_iv_cost (data, use, cnd);
5297               if (!cp)
5298                 continue;
5299
5300               if (!iv_ca_has_deps (ivs, cp))
5301                 continue;
5302
5303               if (!cheaper_cost_pair (cp, new_cp))
5304                 continue;
5305
5306               new_cp = cp;
5307             }
5308         }
5309       else
5310         {
5311           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5312             {
5313               if (ci == cand->id)
5314                 continue;
5315
5316               cnd = iv_cand (data, ci);
5317
5318               cp = get_use_iv_cost (data, use, cnd);
5319               if (!cp)
5320                 continue;
5321               if (!iv_ca_has_deps (ivs, cp))
5322                 continue;
5323
5324               if (!cheaper_cost_pair (cp, new_cp))
5325                 continue;
5326
5327               new_cp = cp;
5328             }
5329         }
5330
5331       if (!new_cp)
5332         {
5333           iv_ca_delta_free (delta);
5334           return infinite_cost;
5335         }
5336
5337       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5338     }
5339
5340   iv_ca_delta_commit (data, ivs, *delta, true);
5341   cost = iv_ca_cost (ivs);
5342   iv_ca_delta_commit (data, ivs, *delta, false);
5343
5344   return cost;
5345 }
5346
5347 /* Try optimizing the set of candidates IVS by removing candidates different
5348    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5349    differences in DELTA.  */
5350
5351 static comp_cost
5352 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5353              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5354 {
5355   bitmap_iterator bi;
5356   struct iv_ca_delta *act_delta, *best_delta;
5357   unsigned i;
5358   comp_cost best_cost, acost;
5359   struct iv_cand *cand;
5360
5361   best_delta = NULL;
5362   best_cost = iv_ca_cost (ivs);
5363
5364   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5365     {
5366       cand = iv_cand (data, i);
5367
5368       if (cand == except_cand)
5369         continue;
5370
5371       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5372
5373       if (compare_costs (acost, best_cost) < 0)
5374         {
5375           best_cost = acost;
5376           iv_ca_delta_free (&best_delta);
5377           best_delta = act_delta;
5378         }
5379       else
5380         iv_ca_delta_free (&act_delta);
5381     }
5382
5383   if (!best_delta)
5384     {
5385       *delta = NULL;
5386       return best_cost;
5387     }
5388
5389   /* Recurse to possibly remove other unnecessary ivs.  */
5390   iv_ca_delta_commit (data, ivs, best_delta, true);
5391   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5392   iv_ca_delta_commit (data, ivs, best_delta, false);
5393   *delta = iv_ca_delta_join (best_delta, *delta);
5394   return best_cost;
5395 }
5396
5397 /* Tries to extend the sets IVS in the best possible way in order
5398    to express the USE.  If ORIGINALP is true, prefer candidates from
5399    the original set of IVs, otherwise favor important candidates not
5400    based on any memory object.  */
5401
5402 static bool
5403 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5404                   struct iv_use *use, bool originalp)
5405 {
5406   comp_cost best_cost, act_cost;
5407   unsigned i;
5408   bitmap_iterator bi;
5409   struct iv_cand *cand;
5410   struct iv_ca_delta *best_delta = NULL, *act_delta;
5411   struct cost_pair *cp;
5412
5413   iv_ca_add_use (data, ivs, use, false);
5414   best_cost = iv_ca_cost (ivs);
5415
5416   cp = iv_ca_cand_for_use (ivs, use);
5417   if (!cp)
5418     {
5419       ivs->upto--;
5420       ivs->bad_uses--;
5421       iv_ca_add_use (data, ivs, use, true);
5422       best_cost = iv_ca_cost (ivs);
5423       cp = iv_ca_cand_for_use (ivs, use);
5424     }
5425   if (cp)
5426     {
5427       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5428       iv_ca_set_no_cp (data, ivs, use);
5429     }
5430
5431   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5432      first try important candidates not based on any memory object.  Only if
5433      this fails, try the specific ones.  Rationale -- in loops with many
5434      variables the best choice often is to use just one generic biv.  If we
5435      added here many ivs specific to the uses, the optimization algorithm later
5436      would be likely to get stuck in a local minimum, thus causing us to create
5437      too many ivs.  The approach from few ivs to more seems more likely to be
5438      successful -- starting from few ivs, replacing an expensive use by a
5439      specific iv should always be a win.  */
5440   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5441     {
5442       cand = iv_cand (data, i);
5443
5444       if (originalp && cand->pos !=IP_ORIGINAL)
5445         continue;
5446
5447       if (!originalp && cand->iv->base_object != NULL_TREE)
5448         continue;
5449
5450       if (iv_ca_cand_used_p (ivs, cand))
5451         continue;
5452
5453       cp = get_use_iv_cost (data, use, cand);
5454       if (!cp)
5455         continue;
5456
5457       iv_ca_set_cp (data, ivs, use, cp);
5458       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5459                                true);
5460       iv_ca_set_no_cp (data, ivs, use);
5461       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5462
5463       if (compare_costs (act_cost, best_cost) < 0)
5464         {
5465           best_cost = act_cost;
5466
5467           iv_ca_delta_free (&best_delta);
5468           best_delta = act_delta;
5469         }
5470       else
5471         iv_ca_delta_free (&act_delta);
5472     }
5473
5474   if (infinite_cost_p (best_cost))
5475     {
5476       for (i = 0; i < use->n_map_members; i++)
5477         {
5478           cp = use->cost_map + i;
5479           cand = cp->cand;
5480           if (!cand)
5481             continue;
5482
5483           /* Already tried this.  */
5484           if (cand->important)
5485             {
5486               if (originalp && cand->pos == IP_ORIGINAL)
5487                 continue;
5488               if (!originalp && cand->iv->base_object == NULL_TREE)
5489                 continue;
5490             }
5491
5492           if (iv_ca_cand_used_p (ivs, cand))
5493             continue;
5494
5495           act_delta = NULL;
5496           iv_ca_set_cp (data, ivs, use, cp);
5497           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5498           iv_ca_set_no_cp (data, ivs, use);
5499           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5500                                        cp, act_delta);
5501
5502           if (compare_costs (act_cost, best_cost) < 0)
5503             {
5504               best_cost = act_cost;
5505
5506               if (best_delta)
5507                 iv_ca_delta_free (&best_delta);
5508               best_delta = act_delta;
5509             }
5510           else
5511             iv_ca_delta_free (&act_delta);
5512         }
5513     }
5514
5515   iv_ca_delta_commit (data, ivs, best_delta, true);
5516   iv_ca_delta_free (&best_delta);
5517
5518   return !infinite_cost_p (best_cost);
5519 }
5520
5521 /* Finds an initial assignment of candidates to uses.  */
5522
5523 static struct iv_ca *
5524 get_initial_solution (struct ivopts_data *data, bool originalp)
5525 {
5526   struct iv_ca *ivs = iv_ca_new (data);
5527   unsigned i;
5528
5529   for (i = 0; i < n_iv_uses (data); i++)
5530     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5531       {
5532         iv_ca_free (&ivs);
5533         return NULL;
5534       }
5535
5536   return ivs;
5537 }
5538
5539 /* Tries to improve set of induction variables IVS.  */
5540
5541 static bool
5542 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5543 {
5544   unsigned i, n_ivs;
5545   comp_cost acost, best_cost = iv_ca_cost (ivs);
5546   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5547   struct iv_cand *cand;
5548
5549   /* Try extending the set of induction variables by one.  */
5550   for (i = 0; i < n_iv_cands (data); i++)
5551     {
5552       cand = iv_cand (data, i);
5553
5554       if (iv_ca_cand_used_p (ivs, cand))
5555         continue;
5556
5557       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5558       if (!act_delta)
5559         continue;
5560
5561       /* If we successfully added the candidate and the set is small enough,
5562          try optimizing it by removing other candidates.  */
5563       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5564         {
5565           iv_ca_delta_commit (data, ivs, act_delta, true);
5566           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5567           iv_ca_delta_commit (data, ivs, act_delta, false);
5568           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5569         }
5570
5571       if (compare_costs (acost, best_cost) < 0)
5572         {
5573           best_cost = acost;
5574           iv_ca_delta_free (&best_delta);
5575           best_delta = act_delta;
5576         }
5577       else
5578         iv_ca_delta_free (&act_delta);
5579     }
5580
5581   if (!best_delta)
5582     {
5583       /* Try removing the candidates from the set instead.  */
5584       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5585
5586       /* Nothing more we can do.  */
5587       if (!best_delta)
5588         return false;
5589     }
5590
5591   iv_ca_delta_commit (data, ivs, best_delta, true);
5592   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5593   iv_ca_delta_free (&best_delta);
5594   return true;
5595 }
5596
5597 /* Attempts to find the optimal set of induction variables.  We do simple
5598    greedy heuristic -- we try to replace at most one candidate in the selected
5599    solution and remove the unused ivs while this improves the cost.  */
5600
5601 static struct iv_ca *
5602 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5603 {
5604   struct iv_ca *set;
5605
5606   /* Get the initial solution.  */
5607   set = get_initial_solution (data, originalp);
5608   if (!set)
5609     {
5610       if (dump_file && (dump_flags & TDF_DETAILS))
5611         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5612       return NULL;
5613     }
5614
5615   if (dump_file && (dump_flags & TDF_DETAILS))
5616     {
5617       fprintf (dump_file, "Initial set of candidates:\n");
5618       iv_ca_dump (data, dump_file, set);
5619     }
5620
5621   while (try_improve_iv_set (data, set))
5622     {
5623       if (dump_file && (dump_flags & TDF_DETAILS))
5624         {
5625           fprintf (dump_file, "Improved to:\n");
5626           iv_ca_dump (data, dump_file, set);
5627         }
5628     }
5629
5630   return set;
5631 }
5632
5633 static struct iv_ca *
5634 find_optimal_iv_set (struct ivopts_data *data)
5635 {
5636   unsigned i;
5637   struct iv_ca *set, *origset;
5638   struct iv_use *use;
5639   comp_cost cost, origcost;
5640
5641   /* Determine the cost based on a strategy that starts with original IVs,
5642      and try again using a strategy that prefers candidates not based
5643      on any IVs.  */
5644   origset = find_optimal_iv_set_1 (data, true);
5645   set = find_optimal_iv_set_1 (data, false);
5646
5647   if (!origset && !set)
5648     return NULL;
5649
5650   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5651   cost = set ? iv_ca_cost (set) : infinite_cost;
5652
5653   if (dump_file && (dump_flags & TDF_DETAILS))
5654     {
5655       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5656                origcost.cost, origcost.complexity);
5657       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5658                cost.cost, cost.complexity);
5659     }
5660
5661   /* Choose the one with the best cost.  */
5662   if (compare_costs (origcost, cost) <= 0)
5663     {
5664       if (set)
5665         iv_ca_free (&set);
5666       set = origset;
5667     }
5668   else if (origset)
5669     iv_ca_free (&origset);
5670
5671   for (i = 0; i < n_iv_uses (data); i++)
5672     {
5673       use = iv_use (data, i);
5674       use->selected = iv_ca_cand_for_use (set, use)->cand;
5675     }
5676
5677   return set;
5678 }
5679
5680 /* Creates a new induction variable corresponding to CAND.  */
5681
5682 static void
5683 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5684 {
5685   gimple_stmt_iterator incr_pos;
5686   tree base;
5687   bool after = false;
5688
5689   if (!cand->iv)
5690     return;
5691
5692   switch (cand->pos)
5693     {
5694     case IP_NORMAL:
5695       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5696       break;
5697
5698     case IP_END:
5699       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5700       after = true;
5701       break;
5702
5703     case IP_AFTER_USE:
5704       after = true;
5705       /* fall through */
5706     case IP_BEFORE_USE:
5707       incr_pos = gsi_for_stmt (cand->incremented_at);
5708       break;
5709
5710     case IP_ORIGINAL:
5711       /* Mark that the iv is preserved.  */
5712       name_info (data, cand->var_before)->preserve_biv = true;
5713       name_info (data, cand->var_after)->preserve_biv = true;
5714
5715       /* Rewrite the increment so that it uses var_before directly.  */
5716       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5717       return;
5718     }
5719
5720   gimple_add_tmp_var (cand->var_before);
5721   add_referenced_var (cand->var_before);
5722
5723   base = unshare_expr (cand->iv->base);
5724
5725   create_iv (base, unshare_expr (cand->iv->step),
5726              cand->var_before, data->current_loop,
5727              &incr_pos, after, &cand->var_before, &cand->var_after);
5728 }
5729
5730 /* Creates new induction variables described in SET.  */
5731
5732 static void
5733 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5734 {
5735   unsigned i;
5736   struct iv_cand *cand;
5737   bitmap_iterator bi;
5738
5739   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5740     {
5741       cand = iv_cand (data, i);
5742       create_new_iv (data, cand);
5743     }
5744
5745   if (dump_file && (dump_flags & TDF_DETAILS))
5746     {
5747       fprintf (dump_file, "\nSelected IV set: \n");
5748       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5749         {
5750           cand = iv_cand (data, i);
5751           dump_cand (dump_file, cand);
5752         }
5753       fprintf (dump_file, "\n");
5754     }
5755 }
5756
5757 /* Rewrites USE (definition of iv used in a nonlinear expression)
5758    using candidate CAND.  */
5759
5760 static void
5761 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5762                             struct iv_use *use, struct iv_cand *cand)
5763 {
5764   tree comp;
5765   tree op, tgt;
5766   gimple ass;
5767   gimple_stmt_iterator bsi;
5768
5769   /* An important special case -- if we are asked to express value of
5770      the original iv by itself, just exit; there is no need to
5771      introduce a new computation (that might also need casting the
5772      variable to unsigned and back).  */
5773   if (cand->pos == IP_ORIGINAL
5774       && cand->incremented_at == use->stmt)
5775     {
5776       tree step, ctype, utype;
5777       enum tree_code incr_code = PLUS_EXPR, old_code;
5778
5779       gcc_assert (is_gimple_assign (use->stmt));
5780       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5781
5782       step = cand->iv->step;
5783       ctype = TREE_TYPE (step);
5784       utype = TREE_TYPE (cand->var_after);
5785       if (TREE_CODE (step) == NEGATE_EXPR)
5786         {
5787           incr_code = MINUS_EXPR;
5788           step = TREE_OPERAND (step, 0);
5789         }
5790
5791       /* Check whether we may leave the computation unchanged.
5792          This is the case only if it does not rely on other
5793          computations in the loop -- otherwise, the computation
5794          we rely upon may be removed in remove_unused_ivs,
5795          thus leading to ICE.  */
5796       old_code = gimple_assign_rhs_code (use->stmt);
5797       if (old_code == PLUS_EXPR
5798           || old_code == MINUS_EXPR
5799           || old_code == POINTER_PLUS_EXPR)
5800         {
5801           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5802             op = gimple_assign_rhs2 (use->stmt);
5803           else if (old_code != MINUS_EXPR
5804                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5805             op = gimple_assign_rhs1 (use->stmt);
5806           else
5807             op = NULL_TREE;
5808         }
5809       else
5810         op = NULL_TREE;
5811
5812       if (op
5813           && (TREE_CODE (op) == INTEGER_CST
5814               || operand_equal_p (op, step, 0)))
5815         return;
5816
5817       /* Otherwise, add the necessary computations to express
5818          the iv.  */
5819       op = fold_convert (ctype, cand->var_before);
5820       comp = fold_convert (utype,
5821                            build2 (incr_code, ctype, op,
5822                                    unshare_expr (step)));
5823     }
5824   else
5825     {
5826       comp = get_computation (data->current_loop, use, cand);
5827       gcc_assert (comp != NULL_TREE);
5828     }
5829
5830   switch (gimple_code (use->stmt))
5831     {
5832     case GIMPLE_PHI:
5833       tgt = PHI_RESULT (use->stmt);
5834
5835       /* If we should keep the biv, do not replace it.  */
5836       if (name_info (data, tgt)->preserve_biv)
5837         return;
5838
5839       bsi = gsi_after_labels (gimple_bb (use->stmt));
5840       break;
5841
5842     case GIMPLE_ASSIGN:
5843       tgt = gimple_assign_lhs (use->stmt);
5844       bsi = gsi_for_stmt (use->stmt);
5845       break;
5846
5847     default:
5848       gcc_unreachable ();
5849     }
5850
5851   if (!valid_gimple_rhs_p (comp)
5852       || (gimple_code (use->stmt) != GIMPLE_PHI
5853           /* We can't allow re-allocating the stmt as it might be pointed
5854              to still.  */
5855           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5856               >= gimple_num_ops (gsi_stmt (bsi)))))
5857     {
5858       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5859                                        true, GSI_SAME_STMT);
5860       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5861         duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5862     }
5863
5864   if (gimple_code (use->stmt) == GIMPLE_PHI)
5865     {
5866       ass = gimple_build_assign (tgt, comp);
5867       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5868
5869       bsi = gsi_for_stmt (use->stmt);
5870       remove_phi_node (&bsi, false);
5871     }
5872   else
5873     {
5874       gimple_assign_set_rhs_from_tree (&bsi, comp);
5875       use->stmt = gsi_stmt (bsi);
5876     }
5877 }
5878
5879 /* Copies the reference information from OLD_REF to NEW_REF.  */
5880
5881 static void
5882 copy_ref_info (tree new_ref, tree old_ref)
5883 {
5884   tree new_ptr_base = NULL_TREE;
5885
5886   TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5887   TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5888
5889   if (TREE_CODE (new_ref) == TARGET_MEM_REF)
5890     new_ptr_base = TMR_BASE (new_ref);
5891   else if (TREE_CODE (new_ref) == MEM_REF)
5892     new_ptr_base = TREE_OPERAND (new_ref, 0);
5893
5894   /* We can transfer points-to information from an old pointer
5895      or decl base to the new one.  */
5896   if (new_ptr_base
5897       && TREE_CODE (new_ptr_base) == SSA_NAME
5898       && POINTER_TYPE_P (TREE_TYPE (new_ptr_base))
5899       && !SSA_NAME_PTR_INFO (new_ptr_base))
5900     {
5901       tree base = get_base_address (old_ref);
5902       if (!base)
5903         ;
5904       else if ((INDIRECT_REF_P (base)
5905                 || TREE_CODE (base) == MEM_REF)
5906                && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
5907         duplicate_ssa_name_ptr_info
5908           (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5909       else if (TREE_CODE (base) == VAR_DECL
5910                || TREE_CODE (base) == PARM_DECL
5911                || TREE_CODE (base) == RESULT_DECL)
5912         {
5913           struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5914           pt_solution_set_var (&pi->pt, base);
5915         }
5916     }
5917 }
5918
5919 /* Performs a peephole optimization to reorder the iv update statement with
5920    a mem ref to enable instruction combining in later phases. The mem ref uses
5921    the iv value before the update, so the reordering transformation requires
5922    adjustment of the offset. CAND is the selected IV_CAND.
5923
5924    Example:
5925
5926    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
5927    iv2 = iv1 + 1;
5928
5929    if (t < val)      (1)
5930      goto L;
5931    goto Head;
5932
5933
5934    directly propagating t over to (1) will introduce overlapping live range
5935    thus increase register pressure. This peephole transform it into:
5936
5937
5938    iv2 = iv1 + 1;
5939    t = MEM_REF (base, iv2, 8, 8);
5940    if (t < val)
5941      goto L;
5942    goto Head;
5943 */
5944
5945 static void
5946 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
5947 {
5948   tree var_after;
5949   gimple iv_update, stmt;
5950   basic_block bb;
5951   gimple_stmt_iterator gsi, gsi_iv;
5952
5953   if (cand->pos != IP_NORMAL)
5954     return;
5955
5956   var_after = cand->var_after;
5957   iv_update = SSA_NAME_DEF_STMT (var_after);
5958
5959   bb = gimple_bb (iv_update);
5960   gsi = gsi_last_nondebug_bb (bb);
5961   stmt = gsi_stmt (gsi);
5962
5963   /* Only handle conditional statement for now.  */
5964   if (gimple_code (stmt) != GIMPLE_COND)
5965     return;
5966
5967   gsi_prev_nondebug (&gsi);
5968   stmt = gsi_stmt (gsi);
5969   if (stmt != iv_update)
5970     return;
5971
5972   gsi_prev_nondebug (&gsi);
5973   if (gsi_end_p (gsi))
5974     return;
5975
5976   stmt = gsi_stmt (gsi);
5977   if (gimple_code (stmt) != GIMPLE_ASSIGN)
5978     return;
5979
5980   if (stmt != use->stmt)
5981     return;
5982
5983   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5984     return;
5985
5986   if (dump_file && (dump_flags & TDF_DETAILS))
5987     {
5988       fprintf (dump_file, "Reordering \n");
5989       print_gimple_stmt (dump_file, iv_update, 0, 0);
5990       print_gimple_stmt (dump_file, use->stmt, 0, 0);
5991       fprintf (dump_file, "\n");
5992     }
5993
5994   gsi = gsi_for_stmt (use->stmt);
5995   gsi_iv = gsi_for_stmt (iv_update);
5996   gsi_move_before (&gsi_iv, &gsi);
5997
5998   cand->pos = IP_BEFORE_USE;
5999   cand->incremented_at = use->stmt;
6000 }
6001
6002 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6003
6004 static void
6005 rewrite_use_address (struct ivopts_data *data,
6006                      struct iv_use *use, struct iv_cand *cand)
6007 {
6008   aff_tree aff;
6009   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6010   tree base_hint = NULL_TREE;
6011   tree ref, iv;
6012   bool ok;
6013
6014   adjust_iv_update_pos (cand, use);
6015   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6016   gcc_assert (ok);
6017   unshare_aff_combination (&aff);
6018
6019   /* To avoid undefined overflow problems, all IV candidates use unsigned
6020      integer types.  The drawback is that this makes it impossible for
6021      create_mem_ref to distinguish an IV that is based on a memory object
6022      from one that represents simply an offset.
6023
6024      To work around this problem, we pass a hint to create_mem_ref that
6025      indicates which variable (if any) in aff is an IV based on a memory
6026      object.  Note that we only consider the candidate.  If this is not
6027      based on an object, the base of the reference is in some subexpression
6028      of the use -- but these will use pointer types, so they are recognized
6029      by the create_mem_ref heuristics anyway.  */
6030   if (cand->iv->base_object)
6031     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6032
6033   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6034   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6035                         reference_alias_ptr_type (*use->op_p),
6036                         iv, base_hint, data->speed);
6037   copy_ref_info (ref, *use->op_p);
6038   *use->op_p = ref;
6039 }
6040
6041 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6042    candidate CAND.  */
6043
6044 static void
6045 rewrite_use_compare (struct ivopts_data *data,
6046                      struct iv_use *use, struct iv_cand *cand)
6047 {
6048   tree comp, *var_p, op, bound;
6049   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6050   enum tree_code compare;
6051   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6052   bool ok;
6053
6054   bound = cp->value;
6055   if (bound)
6056     {
6057       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6058       tree var_type = TREE_TYPE (var);
6059       gimple_seq stmts;
6060
6061       if (dump_file && (dump_flags & TDF_DETAILS))
6062         {
6063           fprintf (dump_file, "Replacing exit test: ");
6064           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6065         }
6066       compare = iv_elimination_compare (data, use);
6067       bound = unshare_expr (fold_convert (var_type, bound));
6068       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6069       if (stmts)
6070         gsi_insert_seq_on_edge_immediate (
6071                 loop_preheader_edge (data->current_loop),
6072                 stmts);
6073
6074       gimple_cond_set_lhs (use->stmt, var);
6075       gimple_cond_set_code (use->stmt, compare);
6076       gimple_cond_set_rhs (use->stmt, op);
6077       return;
6078     }
6079
6080   /* The induction variable elimination failed; just express the original
6081      giv.  */
6082   comp = get_computation (data->current_loop, use, cand);
6083   gcc_assert (comp != NULL_TREE);
6084
6085   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6086   gcc_assert (ok);
6087
6088   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6089                                      true, GSI_SAME_STMT);
6090 }
6091
6092 /* Rewrites USE using candidate CAND.  */
6093
6094 static void
6095 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6096 {
6097   switch (use->type)
6098     {
6099       case USE_NONLINEAR_EXPR:
6100         rewrite_use_nonlinear_expr (data, use, cand);
6101         break;
6102
6103       case USE_ADDRESS:
6104         rewrite_use_address (data, use, cand);
6105         break;
6106
6107       case USE_COMPARE:
6108         rewrite_use_compare (data, use, cand);
6109         break;
6110
6111       default:
6112         gcc_unreachable ();
6113     }
6114
6115   update_stmt (use->stmt);
6116 }
6117
6118 /* Rewrite the uses using the selected induction variables.  */
6119
6120 static void
6121 rewrite_uses (struct ivopts_data *data)
6122 {
6123   unsigned i;
6124   struct iv_cand *cand;
6125   struct iv_use *use;
6126
6127   for (i = 0; i < n_iv_uses (data); i++)
6128     {
6129       use = iv_use (data, i);
6130       cand = use->selected;
6131       gcc_assert (cand);
6132
6133       rewrite_use (data, use, cand);
6134     }
6135 }
6136
6137 /* Removes the ivs that are not used after rewriting.  */
6138
6139 static void
6140 remove_unused_ivs (struct ivopts_data *data)
6141 {
6142   unsigned j;
6143   bitmap_iterator bi;
6144   bitmap toremove = BITMAP_ALLOC (NULL);
6145
6146   /* Figure out an order in which to release SSA DEFs so that we don't
6147      release something that we'd have to propagate into a debug stmt
6148      afterwards.  */
6149   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6150     {
6151       struct version_info *info;
6152
6153       info = ver_info (data, j);
6154       if (info->iv
6155           && !integer_zerop (info->iv->step)
6156           && !info->inv_id
6157           && !info->iv->have_use_for
6158           && !info->preserve_biv)
6159         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6160     }
6161
6162   release_defs_bitset (toremove);
6163
6164   BITMAP_FREE (toremove);
6165 }
6166
6167 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6168    for pointer_map_traverse.  */
6169
6170 static bool
6171 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6172                       void *data ATTRIBUTE_UNUSED)
6173 {
6174   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6175
6176   free (niter);
6177   return true;
6178 }
6179
6180 /* Frees data allocated by the optimization of a single loop.  */
6181
6182 static void
6183 free_loop_data (struct ivopts_data *data)
6184 {
6185   unsigned i, j;
6186   bitmap_iterator bi;
6187   tree obj;
6188
6189   if (data->niters)
6190     {
6191       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6192       pointer_map_destroy (data->niters);
6193       data->niters = NULL;
6194     }
6195
6196   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6197     {
6198       struct version_info *info;
6199
6200       info = ver_info (data, i);
6201       if (info->iv)
6202         free (info->iv);
6203       info->iv = NULL;
6204       info->has_nonlin_use = false;
6205       info->preserve_biv = false;
6206       info->inv_id = 0;
6207     }
6208   bitmap_clear (data->relevant);
6209   bitmap_clear (data->important_candidates);
6210
6211   for (i = 0; i < n_iv_uses (data); i++)
6212     {
6213       struct iv_use *use = iv_use (data, i);
6214
6215       free (use->iv);
6216       BITMAP_FREE (use->related_cands);
6217       for (j = 0; j < use->n_map_members; j++)
6218         if (use->cost_map[j].depends_on)
6219           BITMAP_FREE (use->cost_map[j].depends_on);
6220       free (use->cost_map);
6221       free (use);
6222     }
6223   VEC_truncate (iv_use_p, data->iv_uses, 0);
6224
6225   for (i = 0; i < n_iv_cands (data); i++)
6226     {
6227       struct iv_cand *cand = iv_cand (data, i);
6228
6229       if (cand->iv)
6230         free (cand->iv);
6231       if (cand->depends_on)
6232         BITMAP_FREE (cand->depends_on);
6233       free (cand);
6234     }
6235   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6236
6237   if (data->version_info_size < num_ssa_names)
6238     {
6239       data->version_info_size = 2 * num_ssa_names;
6240       free (data->version_info);
6241       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6242     }
6243
6244   data->max_inv_id = 0;
6245
6246   FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6247     SET_DECL_RTL (obj, NULL_RTX);
6248
6249   VEC_truncate (tree, decl_rtl_to_reset, 0);
6250
6251   htab_empty (data->inv_expr_tab);
6252   data->inv_expr_id = 0;
6253 }
6254
6255 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6256    loop tree.  */
6257
6258 static void
6259 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6260 {
6261   free_loop_data (data);
6262   free (data->version_info);
6263   BITMAP_FREE (data->relevant);
6264   BITMAP_FREE (data->important_candidates);
6265
6266   VEC_free (tree, heap, decl_rtl_to_reset);
6267   VEC_free (iv_use_p, heap, data->iv_uses);
6268   VEC_free (iv_cand_p, heap, data->iv_candidates);
6269   htab_delete (data->inv_expr_tab);
6270 }
6271
6272 /* Returns true if the loop body BODY includes any function calls.  */
6273
6274 static bool
6275 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6276 {
6277   gimple_stmt_iterator gsi;
6278   unsigned i;
6279
6280   for (i = 0; i < num_nodes; i++)
6281     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6282       {
6283         gimple stmt = gsi_stmt (gsi);
6284         if (is_gimple_call (stmt)
6285             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6286           return true;
6287       }
6288   return false;
6289 }
6290
6291 /* Optimizes the LOOP.  Returns true if anything changed.  */
6292
6293 static bool
6294 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6295 {
6296   bool changed = false;
6297   struct iv_ca *iv_ca;
6298   edge exit;
6299   basic_block *body;
6300
6301   gcc_assert (!data->niters);
6302   data->current_loop = loop;
6303   data->speed = optimize_loop_for_speed_p (loop);
6304
6305   if (dump_file && (dump_flags & TDF_DETAILS))
6306     {
6307       fprintf (dump_file, "Processing loop %d\n", loop->num);
6308
6309       exit = single_dom_exit (loop);
6310       if (exit)
6311         {
6312           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6313                    exit->src->index, exit->dest->index);
6314           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6315           fprintf (dump_file, "\n");
6316         }
6317
6318       fprintf (dump_file, "\n");
6319     }
6320
6321   body = get_loop_body (loop);
6322   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6323   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6324   free (body);
6325
6326   /* For each ssa name determines whether it behaves as an induction variable
6327      in some loop.  */
6328   if (!find_induction_variables (data))
6329     goto finish;
6330
6331   /* Finds interesting uses (item 1).  */
6332   find_interesting_uses (data);
6333   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6334     goto finish;
6335
6336   /* Finds candidates for the induction variables (item 2).  */
6337   find_iv_candidates (data);
6338
6339   /* Calculates the costs (item 3, part 1).  */
6340   determine_iv_costs (data);
6341   determine_use_iv_costs (data);
6342   determine_set_costs (data);
6343
6344   /* Find the optimal set of induction variables (item 3, part 2).  */
6345   iv_ca = find_optimal_iv_set (data);
6346   if (!iv_ca)
6347     goto finish;
6348   changed = true;
6349
6350   /* Create the new induction variables (item 4, part 1).  */
6351   create_new_ivs (data, iv_ca);
6352   iv_ca_free (&iv_ca);
6353
6354   /* Rewrite the uses (item 4, part 2).  */
6355   rewrite_uses (data);
6356
6357   /* Remove the ivs that are unused after rewriting.  */
6358   remove_unused_ivs (data);
6359
6360   /* We have changed the structure of induction variables; it might happen
6361      that definitions in the scev database refer to some of them that were
6362      eliminated.  */
6363   scev_reset ();
6364
6365 finish:
6366   free_loop_data (data);
6367
6368   return changed;
6369 }
6370
6371 /* Main entry point.  Optimizes induction variables in loops.  */
6372
6373 void
6374 tree_ssa_iv_optimize (void)
6375 {
6376   struct loop *loop;
6377   struct ivopts_data data;
6378   loop_iterator li;
6379
6380   tree_ssa_iv_optimize_init (&data);
6381
6382   /* Optimize the loops starting with the innermost ones.  */
6383   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6384     {
6385       if (dump_file && (dump_flags & TDF_DETAILS))
6386         flow_loop_dump (loop, dump_file, NULL, 1);
6387
6388       tree_ssa_iv_optimize_loop (&data, loop);
6389     }
6390
6391   tree_ssa_iv_optimize_finalize (&data);
6392 }