gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "output.h"
  73 #include "tree-pretty-print.h"
  74 #include "gimple-pretty-print.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "tree-pass.h"
  80 #include "ggc.h"
  81 #include "insn-config.h"
  82 #include "recog.h"
  83 #include "pointer-set.h"
  84 #include "hashtab.h"
  85 #include "tree-chrec.h"
  86 #include "tree-scalar-evolution.h"
  87 #include "cfgloop.h"
  88 #include "params.h"
  89 #include "langhooks.h"
  90 #include "tree-affine.h"
  91 #include "target.h"
  92 #include "tree-inline.h"
  93 #include "tree-ssa-propagate.h"
  94
  95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  96    cost of different addressing modes.  This should be moved to a TBD
  97    interface between the GIMPLE and RTL worlds.  */
  98 #include "expr.h"
  99
 100 /* The infinite cost.  */
 101 #define INFTY 10000000
 102
 103 #define AVG_LOOP_NITER(LOOP) 5
 104
 105 /* Returns the expected number of loop iterations for LOOP.
 106    The average trip count is computed from profile data if it
 107    exists. */
 108
 109 static inline HOST_WIDE_INT
 110 avg_loop_niter (struct loop *loop)
 111 {
 112   HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
 113   if (niter == -1)
 114     return AVG_LOOP_NITER (loop);
 115
 116   return niter;
 117 }
 118
 119 /* Representation of the induction variable.  */
 120 struct iv
 121 {
 122   tree base;            /* Initial value of the iv.  */
 123   tree base_object;     /* A memory object to that the induction variable points.  */
 124   tree step;            /* Step of the iv (constant only).  */
 125   tree ssa_name;        /* The ssa name with the value.  */
 126   bool biv_p;           /* Is it a biv?  */
 127   bool have_use_for;    /* Do we already have a use for it?  */
 128   unsigned use_id;      /* The identifier in the use if it is the case.  */
 129 };
 130
 131 /* Per-ssa version information (induction variable descriptions, etc.).  */
 132 struct version_info
 133 {
 134   tree name;            /* The ssa name.  */
 135   struct iv *iv;        /* Induction variable description.  */
 136   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 137                            an expression that is not an induction variable.  */
 138   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 139   unsigned inv_id;      /* Id of an invariant.  */
 140 };
 141
 142 /* Types of uses.  */
 143 enum use_type
 144 {
 145   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 146   USE_ADDRESS,          /* Use in an address.  */
 147   USE_COMPARE           /* Use is a compare.  */
 148 };
 149
 150 /* Cost of a computation.  */
 151 typedef struct
 152 {
 153   int cost;             /* The runtime cost.  */
 154   unsigned complexity;  /* The estimate of the complexity of the code for
 155                            the computation (in no concrete units --
 156                            complexity field should be larger for more
 157                            complex expressions and addressing modes).  */
 158 } comp_cost;
 159
 160 static const comp_cost zero_cost = {0, 0};
 161 static const comp_cost infinite_cost = {INFTY, INFTY};
 162
 163 /* The candidate - cost pair.  */
 164 struct cost_pair
 165 {
 166   struct iv_cand *cand; /* The candidate.  */
 167   comp_cost cost;       /* The cost.  */
 168   bitmap depends_on;    /* The list of invariants that have to be
 169                            preserved.  */
 170   tree value;           /* For final value elimination, the expression for
 171                            the final value of the iv.  For iv elimination,
 172                            the new bound to compare with.  */
 173   int inv_expr_id;      /* Loop invariant expression id.  */
 174 };
 175
 176 /* Use.  */
 177 struct iv_use
 178 {
 179   unsigned id;          /* The id of the use.  */
 180   enum use_type type;   /* Type of the use.  */
 181   struct iv *iv;        /* The induction variable it is based on.  */
 182   gimple stmt;          /* Statement in that it occurs.  */
 183   tree *op_p;           /* The place where it occurs.  */
 184   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 185                            important ones.  */
 186
 187   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 188   struct cost_pair *cost_map;
 189                         /* The costs wrto the iv candidates.  */
 190
 191   struct iv_cand *selected;
 192                         /* The selected candidate.  */
 193 };
 194
 195 /* The position where the iv is computed.  */
 196 enum iv_position
 197 {
 198   IP_NORMAL,            /* At the end, just before the exit condition.  */
 199   IP_END,               /* At the end of the latch block.  */
 200   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 201   IP_AFTER_USE,         /* Immediately after a specific use.  */
 202   IP_ORIGINAL           /* The original biv.  */
 203 };
 204
 205 /* The induction variable candidate.  */
 206 struct iv_cand
 207 {
 208   unsigned id;          /* The number of the candidate.  */
 209   bool important;       /* Whether this is an "important" candidate, i.e. such
 210                            that it should be considered by all uses.  */
 211   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 212   gimple incremented_at;/* For original biv, the statement where it is
 213                            incremented.  */
 214   tree var_before;      /* The variable used for it before increment.  */
 215   tree var_after;       /* The variable used for it after increment.  */
 216   struct iv *iv;        /* The value of the candidate.  NULL for
 217                            "pseudocandidate" used to indicate the possibility
 218                            to replace the final value of an iv by direct
 219                            computation of the value.  */
 220   unsigned cost;        /* Cost of the candidate.  */
 221   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 222   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 223                               where it is incremented.  */
 224   bitmap depends_on;    /* The list of invariants that are used in step of the
 225                            biv.  */
 226 };
 227
 228 /* Loop invariant expression hashtable entry.  */
 229 struct iv_inv_expr_ent
 230 {
 231   tree expr;
 232   int id;
 233   hashval_t hash;
 234 };
 235
 236 /* The data used by the induction variable optimizations.  */
 237
 238 typedef struct iv_use *iv_use_p;
 239 DEF_VEC_P(iv_use_p);
 240 DEF_VEC_ALLOC_P(iv_use_p,heap);
 241
 242 typedef struct iv_cand *iv_cand_p;
 243 DEF_VEC_P(iv_cand_p);
 244 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 245
 246 struct ivopts_data
 247 {
 248   /* The currently optimized loop.  */
 249   struct loop *current_loop;
 250
 251   /* Numbers of iterations for all exits of the current loop.  */
 252   struct pointer_map_t *niters;
 253
 254   /* Number of registers used in it.  */
 255   unsigned regs_used;
 256
 257   /* The size of version_info array allocated.  */
 258   unsigned version_info_size;
 259
 260   /* The array of information for the ssa names.  */
 261   struct version_info *version_info;
 262
 263   /* The hashtable of loop invariant expressions created
 264      by ivopt.  */
 265   htab_t inv_expr_tab;
 266
 267   /* Loop invariant expression id.  */
 268   int inv_expr_id;
 269
 270   /* The bitmap of indices in version_info whose value was changed.  */
 271   bitmap relevant;
 272
 273   /* The uses of induction variables.  */
 274   VEC(iv_use_p,heap) *iv_uses;
 275
 276   /* The candidates.  */
 277   VEC(iv_cand_p,heap) *iv_candidates;
 278
 279   /* A bitmap of important candidates.  */
 280   bitmap important_candidates;
 281
 282   /* The maximum invariant id.  */
 283   unsigned max_inv_id;
 284
 285   /* Whether to consider just related and important candidates when replacing a
 286      use.  */
 287   bool consider_all_candidates;
 288
 289   /* Are we optimizing for speed?  */
 290   bool speed;
 291
 292   /* Whether the loop body includes any function calls.  */
 293   bool body_includes_call;
 294 };
 295
 296 /* An assignment of iv candidates to uses.  */
 297
 298 struct iv_ca
 299 {
 300   /* The number of uses covered by the assignment.  */
 301   unsigned upto;
 302
 303   /* Number of uses that cannot be expressed by the candidates in the set.  */
 304   unsigned bad_uses;
 305
 306   /* Candidate assigned to a use, together with the related costs.  */
 307   struct cost_pair **cand_for_use;
 308
 309   /* Number of times each candidate is used.  */
 310   unsigned *n_cand_uses;
 311
 312   /* The candidates used.  */
 313   bitmap cands;
 314
 315   /* The number of candidates in the set.  */
 316   unsigned n_cands;
 317
 318   /* Total number of registers needed.  */
 319   unsigned n_regs;
 320
 321   /* Total cost of expressing uses.  */
 322   comp_cost cand_use_cost;
 323
 324   /* Total cost of candidates.  */
 325   unsigned cand_cost;
 326
 327   /* Number of times each invariant is used.  */
 328   unsigned *n_invariant_uses;
 329
 330   /* The array holding the number of uses of each loop
 331      invariant expressions created by ivopt.  */
 332   unsigned *used_inv_expr;
 333
 334   /* The number of created loop invariants.  */
 335   unsigned num_used_inv_expr;
 336
 337   /* Total cost of the assignment.  */
 338   comp_cost cost;
 339 };
 340
 341 /* Difference of two iv candidate assignments.  */
 342
 343 struct iv_ca_delta
 344 {
 345   /* Changed use.  */
 346   struct iv_use *use;
 347
 348   /* An old assignment (for rollback purposes).  */
 349   struct cost_pair *old_cp;
 350
 351   /* A new assignment.  */
 352   struct cost_pair *new_cp;
 353
 354   /* Next change in the list.  */
 355   struct iv_ca_delta *next_change;
 356 };
 357
 358 /* Bound on number of candidates below that all candidates are considered.  */
 359
 360 #define CONSIDER_ALL_CANDIDATES_BOUND \
 361   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 362
 363 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 364    optimizing such a loop would help, and it would take ages).  */
 365
 366 #define MAX_CONSIDERED_USES \
 367   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 368
 369 /* If there are at most this number of ivs in the set, try removing unnecessary
 370    ivs from the set always.  */
 371
 372 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 373   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 374
 375 /* The list of trees for that the decl_rtl field must be reset is stored
 376    here.  */
 377
 378 static VEC(tree,heap) *decl_rtl_to_reset;
 379
 380 /* Number of uses recorded in DATA.  */
 381
 382 static inline unsigned
 383 n_iv_uses (struct ivopts_data *data)
 384 {
 385   return VEC_length (iv_use_p, data->iv_uses);
 386 }
 387
 388 /* Ith use recorded in DATA.  */
 389
 390 static inline struct iv_use *
 391 iv_use (struct ivopts_data *data, unsigned i)
 392 {
 393   return VEC_index (iv_use_p, data->iv_uses, i);
 394 }
 395
 396 /* Number of candidates recorded in DATA.  */
 397
 398 static inline unsigned
 399 n_iv_cands (struct ivopts_data *data)
 400 {
 401   return VEC_length (iv_cand_p, data->iv_candidates);
 402 }
 403
 404 /* Ith candidate recorded in DATA.  */
 405
 406 static inline struct iv_cand *
 407 iv_cand (struct ivopts_data *data, unsigned i)
 408 {
 409   return VEC_index (iv_cand_p, data->iv_candidates, i);
 410 }
 411
 412 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 413
 414 edge
 415 single_dom_exit (struct loop *loop)
 416 {
 417   edge exit = single_exit (loop);
 418
 419   if (!exit)
 420     return NULL;
 421
 422   if (!just_once_each_iteration_p (loop, exit->src))
 423     return NULL;
 424
 425   return exit;
 426 }
 427
 428 /* Dumps information about the induction variable IV to FILE.  */
 429
 430 extern void dump_iv (FILE *, struct iv *);
 431 void
 432 dump_iv (FILE *file, struct iv *iv)
 433 {
 434   if (iv->ssa_name)
 435     {
 436       fprintf (file, "ssa name ");
 437       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 438       fprintf (file, "\n");
 439     }
 440
 441   fprintf (file, "  type ");
 442   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 443   fprintf (file, "\n");
 444
 445   if (iv->step)
 446     {
 447       fprintf (file, "  base ");
 448       print_generic_expr (file, iv->base, TDF_SLIM);
 449       fprintf (file, "\n");
 450
 451       fprintf (file, "  step ");
 452       print_generic_expr (file, iv->step, TDF_SLIM);
 453       fprintf (file, "\n");
 454     }
 455   else
 456     {
 457       fprintf (file, "  invariant ");
 458       print_generic_expr (file, iv->base, TDF_SLIM);
 459       fprintf (file, "\n");
 460     }
 461
 462   if (iv->base_object)
 463     {
 464       fprintf (file, "  base object ");
 465       print_generic_expr (file, iv->base_object, TDF_SLIM);
 466       fprintf (file, "\n");
 467     }
 468
 469   if (iv->biv_p)
 470     fprintf (file, "  is a biv\n");
 471 }
 472
 473 /* Dumps information about the USE to FILE.  */
 474
 475 extern void dump_use (FILE *, struct iv_use *);
 476 void
 477 dump_use (FILE *file, struct iv_use *use)
 478 {
 479   fprintf (file, "use %d\n", use->id);
 480
 481   switch (use->type)
 482     {
 483     case USE_NONLINEAR_EXPR:
 484       fprintf (file, "  generic\n");
 485       break;
 486
 487     case USE_ADDRESS:
 488       fprintf (file, "  address\n");
 489       break;
 490
 491     case USE_COMPARE:
 492       fprintf (file, "  compare\n");
 493       break;
 494
 495     default:
 496       gcc_unreachable ();
 497     }
 498
 499   fprintf (file, "  in statement ");
 500   print_gimple_stmt (file, use->stmt, 0, 0);
 501   fprintf (file, "\n");
 502
 503   fprintf (file, "  at position ");
 504   if (use->op_p)
 505     print_generic_expr (file, *use->op_p, TDF_SLIM);
 506   fprintf (file, "\n");
 507
 508   dump_iv (file, use->iv);
 509
 510   if (use->related_cands)
 511     {
 512       fprintf (file, "  related candidates ");
 513       dump_bitmap (file, use->related_cands);
 514     }
 515 }
 516
 517 /* Dumps information about the uses to FILE.  */
 518
 519 extern void dump_uses (FILE *, struct ivopts_data *);
 520 void
 521 dump_uses (FILE *file, struct ivopts_data *data)
 522 {
 523   unsigned i;
 524   struct iv_use *use;
 525
 526   for (i = 0; i < n_iv_uses (data); i++)
 527     {
 528       use = iv_use (data, i);
 529
 530       dump_use (file, use);
 531       fprintf (file, "\n");
 532     }
 533 }
 534
 535 /* Dumps information about induction variable candidate CAND to FILE.  */
 536
 537 extern void dump_cand (FILE *, struct iv_cand *);
 538 void
 539 dump_cand (FILE *file, struct iv_cand *cand)
 540 {
 541   struct iv *iv = cand->iv;
 542
 543   fprintf (file, "candidate %d%s\n",
 544            cand->id, cand->important ? " (important)" : "");
 545
 546   if (cand->depends_on)
 547     {
 548       fprintf (file, "  depends on ");
 549       dump_bitmap (file, cand->depends_on);
 550     }
 551
 552   if (!iv)
 553     {
 554       fprintf (file, "  final value replacement\n");
 555       return;
 556     }
 557
 558   if (cand->var_before)
 559     {
 560       fprintf (file, "  var_before ");
 561       print_generic_expr (file, cand->var_before, TDF_SLIM);
 562       fprintf (file, "\n");
 563     }
 564   if (cand->var_after)
 565     {
 566       fprintf (file, "  var_after ");
 567       print_generic_expr (file, cand->var_after, TDF_SLIM);
 568       fprintf (file, "\n");
 569     }
 570
 571   switch (cand->pos)
 572     {
 573     case IP_NORMAL:
 574       fprintf (file, "  incremented before exit test\n");
 575       break;
 576
 577     case IP_BEFORE_USE:
 578       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 579       break;
 580
 581     case IP_AFTER_USE:
 582       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 583       break;
 584
 585     case IP_END:
 586       fprintf (file, "  incremented at end\n");
 587       break;
 588
 589     case IP_ORIGINAL:
 590       fprintf (file, "  original biv\n");
 591       break;
 592     }
 593
 594   dump_iv (file, iv);
 595 }
 596
 597 /* Returns the info for ssa version VER.  */
 598
 599 static inline struct version_info *
 600 ver_info (struct ivopts_data *data, unsigned ver)
 601 {
 602   return data->version_info + ver;
 603 }
 604
 605 /* Returns the info for ssa name NAME.  */
 606
 607 static inline struct version_info *
 608 name_info (struct ivopts_data *data, tree name)
 609 {
 610   return ver_info (data, SSA_NAME_VERSION (name));
 611 }
 612
 613 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 614    emitted in LOOP.  */
 615
 616 static bool
 617 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 618 {
 619   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 620
 621   gcc_assert (bb);
 622
 623   if (sbb == loop->latch)
 624     return true;
 625
 626   if (sbb != bb)
 627     return false;
 628
 629   return stmt == last_stmt (bb);
 630 }
 631
 632 /* Returns true if STMT if after the place where the original induction
 633    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 634    if the positions are identical.  */
 635
 636 static bool
 637 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 638 {
 639   basic_block cand_bb = gimple_bb (cand->incremented_at);
 640   basic_block stmt_bb = gimple_bb (stmt);
 641
 642   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 643     return false;
 644
 645   if (stmt_bb != cand_bb)
 646     return true;
 647
 648   if (true_if_equal
 649       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 650     return true;
 651   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 652 }
 653
 654 /* Returns true if STMT if after the place where the induction variable
 655    CAND is incremented in LOOP.  */
 656
 657 static bool
 658 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 659 {
 660   switch (cand->pos)
 661     {
 662     case IP_END:
 663       return false;
 664
 665     case IP_NORMAL:
 666       return stmt_after_ip_normal_pos (loop, stmt);
 667
 668     case IP_ORIGINAL:
 669     case IP_AFTER_USE:
 670       return stmt_after_inc_pos (cand, stmt, false);
 671
 672     case IP_BEFORE_USE:
 673       return stmt_after_inc_pos (cand, stmt, true);
 674
 675     default:
 676       gcc_unreachable ();
 677     }
 678 }
 679
 680 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 681
 682 static bool
 683 abnormal_ssa_name_p (tree exp)
 684 {
 685   if (!exp)
 686     return false;
 687
 688   if (TREE_CODE (exp) != SSA_NAME)
 689     return false;
 690
 691   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 692 }
 693
 694 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 695    abnormal phi node.  Callback for for_each_index.  */
 696
 697 static bool
 698 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 699                                   void *data ATTRIBUTE_UNUSED)
 700 {
 701   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 702     {
 703       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 704         return false;
 705       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 706         return false;
 707     }
 708
 709   return !abnormal_ssa_name_p (*index);
 710 }
 711
 712 /* Returns true if EXPR contains a ssa name that occurs in an
 713    abnormal phi node.  */
 714
 715 bool
 716 contains_abnormal_ssa_name_p (tree expr)
 717 {
 718   enum tree_code code;
 719   enum tree_code_class codeclass;
 720
 721   if (!expr)
 722     return false;
 723
 724   code = TREE_CODE (expr);
 725   codeclass = TREE_CODE_CLASS (code);
 726
 727   if (code == SSA_NAME)
 728     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 729
 730   if (code == INTEGER_CST
 731       || is_gimple_min_invariant (expr))
 732     return false;
 733
 734   if (code == ADDR_EXPR)
 735     return !for_each_index (&TREE_OPERAND (expr, 0),
 736                             idx_contains_abnormal_ssa_name_p,
 737                             NULL);
 738
 739   if (code == COND_EXPR)
 740     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 741       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 742       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 743
 744   switch (codeclass)
 745     {
 746     case tcc_binary:
 747     case tcc_comparison:
 748       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 749         return true;
 750
 751       /* Fallthru.  */
 752     case tcc_unary:
 753       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 754         return true;
 755
 756       break;
 757
 758     default:
 759       gcc_unreachable ();
 760     }
 761
 762   return false;
 763 }
 764
 765 /*  Returns tree describing number of iterations determined from
 766     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 767
 768 static tree
 769 niter_for_exit (struct ivopts_data *data, edge exit,
 770                 struct tree_niter_desc **desc_p)
 771 {
 772   struct tree_niter_desc* desc = NULL;
 773   tree niter;
 774   void **slot;
 775
 776   if (!data->niters)
 777     {
 778       data->niters = pointer_map_create ();
 779       slot = NULL;
 780     }
 781   else
 782     slot = pointer_map_contains (data->niters, exit);
 783
 784   if (!slot)
 785     {
 786       /* Try to determine number of iterations.  We must know it
 787          unconditionally (i.e., without possibility of # of iterations
 788          being zero).  Also, we cannot safely work with ssa names that
 789          appear in phi nodes on abnormal edges, so that we do not create
 790          overlapping life ranges for them (PR 27283).  */
 791       desc = XNEW (struct tree_niter_desc);
 792       if (number_of_iterations_exit (data->current_loop,
 793                                      exit, desc, true)
 794           && integer_zerop (desc->may_be_zero)
 795           && !contains_abnormal_ssa_name_p (desc->niter))
 796         niter = desc->niter;
 797       else
 798         niter = NULL_TREE;
 799
 800       desc->niter = niter;
 801       slot = pointer_map_insert (data->niters, exit);
 802       *slot = desc;
 803     }
 804   else
 805     niter = ((struct tree_niter_desc *) *slot)->niter;
 806
 807   if (desc_p)
 808     *desc_p = (struct tree_niter_desc *) *slot;
 809   return niter;
 810 }
 811
 812 /* Returns tree describing number of iterations determined from
 813    single dominating exit of DATA->current_loop, or NULL if something
 814    goes wrong.  */
 815
 816 static tree
 817 niter_for_single_dom_exit (struct ivopts_data *data)
 818 {
 819   edge exit = single_dom_exit (data->current_loop);
 820
 821   if (!exit)
 822     return NULL;
 823
 824   return niter_for_exit (data, exit, NULL);
 825 }
 826
 827 /* Hash table equality function for expressions.  */
 828
 829 static int
 830 htab_inv_expr_eq (const void *ent1, const void *ent2)
 831 {
 832   const struct iv_inv_expr_ent *expr1 =
 833       (const struct iv_inv_expr_ent *)ent1;
 834   const struct iv_inv_expr_ent *expr2 =
 835       (const struct iv_inv_expr_ent *)ent2;
 836
 837   return expr1->hash == expr2->hash
 838          && operand_equal_p (expr1->expr, expr2->expr, 0);
 839 }
 840
 841 /* Hash function for loop invariant expressions.  */
 842
 843 static hashval_t
 844 htab_inv_expr_hash (const void *ent)
 845 {
 846   const struct iv_inv_expr_ent *expr =
 847       (const struct iv_inv_expr_ent *)ent;
 848   return expr->hash;
 849 }
 850
 851 /* Initializes data structures used by the iv optimization pass, stored
 852    in DATA.  */
 853
 854 static void
 855 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 856 {
 857   data->version_info_size = 2 * num_ssa_names;
 858   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 859   data->relevant = BITMAP_ALLOC (NULL);
 860   data->important_candidates = BITMAP_ALLOC (NULL);
 861   data->max_inv_id = 0;
 862   data->niters = NULL;
 863   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 864   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 865   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 866                                     htab_inv_expr_eq, free);
 867   data->inv_expr_id = 0;
 868   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 869 }
 870
 871 /* Returns a memory object to that EXPR points.  In case we are able to
 872    determine that it does not point to any such object, NULL is returned.  */
 873
 874 static tree
 875 determine_base_object (tree expr)
 876 {
 877   enum tree_code code = TREE_CODE (expr);
 878   tree base, obj;
 879
 880   /* If this is a pointer casted to any type, we need to determine
 881      the base object for the pointer; so handle conversions before
 882      throwing away non-pointer expressions.  */
 883   if (CONVERT_EXPR_P (expr))
 884     return determine_base_object (TREE_OPERAND (expr, 0));
 885
 886   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 887     return NULL_TREE;
 888
 889   switch (code)
 890     {
 891     case INTEGER_CST:
 892       return NULL_TREE;
 893
 894     case ADDR_EXPR:
 895       obj = TREE_OPERAND (expr, 0);
 896       base = get_base_address (obj);
 897
 898       if (!base)
 899         return expr;
 900
 901       if (TREE_CODE (base) == MEM_REF)
 902         return determine_base_object (TREE_OPERAND (base, 0));
 903
 904       return fold_convert (ptr_type_node,
 905                            build_fold_addr_expr (base));
 906
 907     case POINTER_PLUS_EXPR:
 908       return determine_base_object (TREE_OPERAND (expr, 0));
 909
 910     case PLUS_EXPR:
 911     case MINUS_EXPR:
 912       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 913       gcc_unreachable ();
 914
 915     default:
 916       return fold_convert (ptr_type_node, expr);
 917     }
 918 }
 919
 920 /* Allocates an induction variable with given initial value BASE and step STEP
 921    for loop LOOP.  */
 922
 923 static struct iv *
 924 alloc_iv (tree base, tree step)
 925 {
 926   struct iv *iv = XCNEW (struct iv);
 927   gcc_assert (step != NULL_TREE);
 928
 929   iv->base = base;
 930   iv->base_object = determine_base_object (base);
 931   iv->step = step;
 932   iv->biv_p = false;
 933   iv->have_use_for = false;
 934   iv->use_id = 0;
 935   iv->ssa_name = NULL_TREE;
 936
 937   return iv;
 938 }
 939
 940 /* Sets STEP and BASE for induction variable IV.  */
 941
 942 static void
 943 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 944 {
 945   struct version_info *info = name_info (data, iv);
 946
 947   gcc_assert (!info->iv);
 948
 949   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 950   info->iv = alloc_iv (base, step);
 951   info->iv->ssa_name = iv;
 952 }
 953
 954 /* Finds induction variable declaration for VAR.  */
 955
 956 static struct iv *
 957 get_iv (struct ivopts_data *data, tree var)
 958 {
 959   basic_block bb;
 960   tree type = TREE_TYPE (var);
 961
 962   if (!POINTER_TYPE_P (type)
 963       && !INTEGRAL_TYPE_P (type))
 964     return NULL;
 965
 966   if (!name_info (data, var)->iv)
 967     {
 968       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 969
 970       if (!bb
 971           || !flow_bb_inside_loop_p (data->current_loop, bb))
 972         set_iv (data, var, var, build_int_cst (type, 0));
 973     }
 974
 975   return name_info (data, var)->iv;
 976 }
 977
 978 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 979    not define a simple affine biv with nonzero step.  */
 980
 981 static tree
 982 determine_biv_step (gimple phi)
 983 {
 984   struct loop *loop = gimple_bb (phi)->loop_father;
 985   tree name = PHI_RESULT (phi);
 986   affine_iv iv;
 987
 988   if (!is_gimple_reg (name))
 989     return NULL_TREE;
 990
 991   if (!simple_iv (loop, loop, name, &iv, true))
 992     return NULL_TREE;
 993
 994   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 995 }
 996
 997 /* Finds basic ivs.  */
 998
 999 static bool
1000 find_bivs (struct ivopts_data *data)
1001 {
1002   gimple phi;
1003   tree step, type, base;
1004   bool found = false;
1005   struct loop *loop = data->current_loop;
1006   gimple_stmt_iterator psi;
1007
1008   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1009     {
1010       phi = gsi_stmt (psi);
1011
1012       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1013         continue;
1014
1015       step = determine_biv_step (phi);
1016       if (!step)
1017         continue;
1018
1019       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1020       base = expand_simple_operations (base);
1021       if (contains_abnormal_ssa_name_p (base)
1022           || contains_abnormal_ssa_name_p (step))
1023         continue;
1024
1025       type = TREE_TYPE (PHI_RESULT (phi));
1026       base = fold_convert (type, base);
1027       if (step)
1028         {
1029           if (POINTER_TYPE_P (type))
1030             step = fold_convert (sizetype, step);
1031           else
1032             step = fold_convert (type, step);
1033         }
1034
1035       set_iv (data, PHI_RESULT (phi), base, step);
1036       found = true;
1037     }
1038
1039   return found;
1040 }
1041
1042 /* Marks basic ivs.  */
1043
1044 static void
1045 mark_bivs (struct ivopts_data *data)
1046 {
1047   gimple phi;
1048   tree var;
1049   struct iv *iv, *incr_iv;
1050   struct loop *loop = data->current_loop;
1051   basic_block incr_bb;
1052   gimple_stmt_iterator psi;
1053
1054   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1055     {
1056       phi = gsi_stmt (psi);
1057
1058       iv = get_iv (data, PHI_RESULT (phi));
1059       if (!iv)
1060         continue;
1061
1062       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1063       incr_iv = get_iv (data, var);
1064       if (!incr_iv)
1065         continue;
1066
1067       /* If the increment is in the subloop, ignore it.  */
1068       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1069       if (incr_bb->loop_father != data->current_loop
1070           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1071         continue;
1072
1073       iv->biv_p = true;
1074       incr_iv->biv_p = true;
1075     }
1076 }
1077
1078 /* Checks whether STMT defines a linear induction variable and stores its
1079    parameters to IV.  */
1080
1081 static bool
1082 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1083 {
1084   tree lhs;
1085   struct loop *loop = data->current_loop;
1086
1087   iv->base = NULL_TREE;
1088   iv->step = NULL_TREE;
1089
1090   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1091     return false;
1092
1093   lhs = gimple_assign_lhs (stmt);
1094   if (TREE_CODE (lhs) != SSA_NAME)
1095     return false;
1096
1097   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1098     return false;
1099   iv->base = expand_simple_operations (iv->base);
1100
1101   if (contains_abnormal_ssa_name_p (iv->base)
1102       || contains_abnormal_ssa_name_p (iv->step))
1103     return false;
1104
1105   return true;
1106 }
1107
1108 /* Finds general ivs in statement STMT.  */
1109
1110 static void
1111 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1112 {
1113   affine_iv iv;
1114
1115   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1116     return;
1117
1118   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1119 }
1120
1121 /* Finds general ivs in basic block BB.  */
1122
1123 static void
1124 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1125 {
1126   gimple_stmt_iterator bsi;
1127
1128   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1129     find_givs_in_stmt (data, gsi_stmt (bsi));
1130 }
1131
1132 /* Finds general ivs.  */
1133
1134 static void
1135 find_givs (struct ivopts_data *data)
1136 {
1137   struct loop *loop = data->current_loop;
1138   basic_block *body = get_loop_body_in_dom_order (loop);
1139   unsigned i;
1140
1141   for (i = 0; i < loop->num_nodes; i++)
1142     find_givs_in_bb (data, body[i]);
1143   free (body);
1144 }
1145
1146 /* For each ssa name defined in LOOP determines whether it is an induction
1147    variable and if so, its initial value and step.  */
1148
1149 static bool
1150 find_induction_variables (struct ivopts_data *data)
1151 {
1152   unsigned i;
1153   bitmap_iterator bi;
1154
1155   if (!find_bivs (data))
1156     return false;
1157
1158   find_givs (data);
1159   mark_bivs (data);
1160
1161   if (dump_file && (dump_flags & TDF_DETAILS))
1162     {
1163       tree niter = niter_for_single_dom_exit (data);
1164
1165       if (niter)
1166         {
1167           fprintf (dump_file, "  number of iterations ");
1168           print_generic_expr (dump_file, niter, TDF_SLIM);
1169           fprintf (dump_file, "\n\n");
1170         };
1171
1172       fprintf (dump_file, "Induction variables:\n\n");
1173
1174       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1175         {
1176           if (ver_info (data, i)->iv)
1177             dump_iv (dump_file, ver_info (data, i)->iv);
1178         }
1179     }
1180
1181   return true;
1182 }
1183
1184 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1185
1186 static struct iv_use *
1187 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1188             gimple stmt, enum use_type use_type)
1189 {
1190   struct iv_use *use = XCNEW (struct iv_use);
1191
1192   use->id = n_iv_uses (data);
1193   use->type = use_type;
1194   use->iv = iv;
1195   use->stmt = stmt;
1196   use->op_p = use_p;
1197   use->related_cands = BITMAP_ALLOC (NULL);
1198
1199   /* To avoid showing ssa name in the dumps, if it was not reset by the
1200      caller.  */
1201   iv->ssa_name = NULL_TREE;
1202
1203   if (dump_file && (dump_flags & TDF_DETAILS))
1204     dump_use (dump_file, use);
1205
1206   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1207
1208   return use;
1209 }
1210
1211 /* Checks whether OP is a loop-level invariant and if so, records it.
1212    NONLINEAR_USE is true if the invariant is used in a way we do not
1213    handle specially.  */
1214
1215 static void
1216 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1217 {
1218   basic_block bb;
1219   struct version_info *info;
1220
1221   if (TREE_CODE (op) != SSA_NAME
1222       || !is_gimple_reg (op))
1223     return;
1224
1225   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1226   if (bb
1227       && flow_bb_inside_loop_p (data->current_loop, bb))
1228     return;
1229
1230   info = name_info (data, op);
1231   info->name = op;
1232   info->has_nonlin_use |= nonlinear_use;
1233   if (!info->inv_id)
1234     info->inv_id = ++data->max_inv_id;
1235   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1236 }
1237
1238 /* Checks whether the use OP is interesting and if so, records it.  */
1239
1240 static struct iv_use *
1241 find_interesting_uses_op (struct ivopts_data *data, tree op)
1242 {
1243   struct iv *iv;
1244   struct iv *civ;
1245   gimple stmt;
1246   struct iv_use *use;
1247
1248   if (TREE_CODE (op) != SSA_NAME)
1249     return NULL;
1250
1251   iv = get_iv (data, op);
1252   if (!iv)
1253     return NULL;
1254
1255   if (iv->have_use_for)
1256     {
1257       use = iv_use (data, iv->use_id);
1258
1259       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1260       return use;
1261     }
1262
1263   if (integer_zerop (iv->step))
1264     {
1265       record_invariant (data, op, true);
1266       return NULL;
1267     }
1268   iv->have_use_for = true;
1269
1270   civ = XNEW (struct iv);
1271   *civ = *iv;
1272
1273   stmt = SSA_NAME_DEF_STMT (op);
1274   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1275               || is_gimple_assign (stmt));
1276
1277   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1278   iv->use_id = use->id;
1279
1280   return use;
1281 }
1282
1283 /* Given a condition in statement STMT, checks whether it is a compare
1284    of an induction variable and an invariant.  If this is the case,
1285    CONTROL_VAR is set to location of the iv, BOUND to the location of
1286    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1287    induction variable descriptions, and true is returned.  If this is not
1288    the case, CONTROL_VAR and BOUND are set to the arguments of the
1289    condition and false is returned.  */
1290
1291 static bool
1292 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1293                        tree **control_var, tree **bound,
1294                        struct iv **iv_var, struct iv **iv_bound)
1295 {
1296   /* The objects returned when COND has constant operands.  */
1297   static struct iv const_iv;
1298   static tree zero;
1299   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1300   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1301   bool ret = false;
1302
1303   if (gimple_code (stmt) == GIMPLE_COND)
1304     {
1305       op0 = gimple_cond_lhs_ptr (stmt);
1306       op1 = gimple_cond_rhs_ptr (stmt);
1307     }
1308   else
1309     {
1310       op0 = gimple_assign_rhs1_ptr (stmt);
1311       op1 = gimple_assign_rhs2_ptr (stmt);
1312     }
1313
1314   zero = integer_zero_node;
1315   const_iv.step = integer_zero_node;
1316
1317   if (TREE_CODE (*op0) == SSA_NAME)
1318     iv0 = get_iv (data, *op0);
1319   if (TREE_CODE (*op1) == SSA_NAME)
1320     iv1 = get_iv (data, *op1);
1321
1322   /* Exactly one of the compared values must be an iv, and the other one must
1323      be an invariant.  */
1324   if (!iv0 || !iv1)
1325     goto end;
1326
1327   if (integer_zerop (iv0->step))
1328     {
1329       /* Control variable may be on the other side.  */
1330       tmp_op = op0; op0 = op1; op1 = tmp_op;
1331       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1332     }
1333   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1334
1335 end:
1336   if (control_var)
1337     *control_var = op0;;
1338   if (iv_var)
1339     *iv_var = iv0;;
1340   if (bound)
1341     *bound = op1;
1342   if (iv_bound)
1343     *iv_bound = iv1;
1344
1345   return ret;
1346 }
1347
1348 /* Checks whether the condition in STMT is interesting and if so,
1349    records it.  */
1350
1351 static void
1352 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1353 {
1354   tree *var_p, *bound_p;
1355   struct iv *var_iv, *civ;
1356
1357   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1358     {
1359       find_interesting_uses_op (data, *var_p);
1360       find_interesting_uses_op (data, *bound_p);
1361       return;
1362     }
1363
1364   civ = XNEW (struct iv);
1365   *civ = *var_iv;
1366   record_use (data, NULL, civ, stmt, USE_COMPARE);
1367 }
1368
1369 /* Returns true if expression EXPR is obviously invariant in LOOP,
1370    i.e. if all its operands are defined outside of the LOOP.  LOOP
1371    should not be the function body.  */
1372
1373 bool
1374 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1375 {
1376   basic_block def_bb;
1377   unsigned i, len;
1378
1379   gcc_assert (loop_depth (loop) > 0);
1380
1381   if (is_gimple_min_invariant (expr))
1382     return true;
1383
1384   if (TREE_CODE (expr) == SSA_NAME)
1385     {
1386       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1387       if (def_bb
1388           && flow_bb_inside_loop_p (loop, def_bb))
1389         return false;
1390
1391       return true;
1392     }
1393
1394   if (!EXPR_P (expr))
1395     return false;
1396
1397   len = TREE_OPERAND_LENGTH (expr);
1398   for (i = 0; i < len; i++)
1399     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1400       return false;
1401
1402   return true;
1403 }
1404
1405 /* Returns true if statement STMT is obviously invariant in LOOP,
1406    i.e. if all its operands on the RHS are defined outside of the LOOP.
1407    LOOP should not be the function body.  */
1408
1409 bool
1410 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1411 {
1412   unsigned i;
1413   tree lhs;
1414
1415   gcc_assert (loop_depth (loop) > 0);
1416
1417   lhs = gimple_get_lhs (stmt);
1418   for (i = 0; i < gimple_num_ops (stmt); i++)
1419     {
1420       tree op = gimple_op (stmt, i);
1421       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1422         return false;
1423     }
1424
1425   return true;
1426 }
1427
1428 /* Cumulates the steps of indices into DATA and replaces their values with the
1429    initial ones.  Returns false when the value of the index cannot be determined.
1430    Callback for for_each_index.  */
1431
1432 struct ifs_ivopts_data
1433 {
1434   struct ivopts_data *ivopts_data;
1435   gimple stmt;
1436   tree step;
1437 };
1438
1439 static bool
1440 idx_find_step (tree base, tree *idx, void *data)
1441 {
1442   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1443   struct iv *iv;
1444   tree step, iv_base, iv_step, lbound, off;
1445   struct loop *loop = dta->ivopts_data->current_loop;
1446
1447   /* If base is a component ref, require that the offset of the reference
1448      be invariant.  */
1449   if (TREE_CODE (base) == COMPONENT_REF)
1450     {
1451       off = component_ref_field_offset (base);
1452       return expr_invariant_in_loop_p (loop, off);
1453     }
1454
1455   /* If base is array, first check whether we will be able to move the
1456      reference out of the loop (in order to take its address in strength
1457      reduction).  In order for this to work we need both lower bound
1458      and step to be loop invariants.  */
1459   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1460     {
1461       /* Moreover, for a range, the size needs to be invariant as well.  */
1462       if (TREE_CODE (base) == ARRAY_RANGE_REF
1463           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1464         return false;
1465
1466       step = array_ref_element_size (base);
1467       lbound = array_ref_low_bound (base);
1468
1469       if (!expr_invariant_in_loop_p (loop, step)
1470           || !expr_invariant_in_loop_p (loop, lbound))
1471         return false;
1472     }
1473
1474   if (TREE_CODE (*idx) != SSA_NAME)
1475     return true;
1476
1477   iv = get_iv (dta->ivopts_data, *idx);
1478   if (!iv)
1479     return false;
1480
1481   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1482           *&x[0], which is not folded and does not trigger the
1483           ARRAY_REF path below.  */
1484   *idx = iv->base;
1485
1486   if (integer_zerop (iv->step))
1487     return true;
1488
1489   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1490     {
1491       step = array_ref_element_size (base);
1492
1493       /* We only handle addresses whose step is an integer constant.  */
1494       if (TREE_CODE (step) != INTEGER_CST)
1495         return false;
1496     }
1497   else
1498     /* The step for pointer arithmetics already is 1 byte.  */
1499     step = size_one_node;
1500
1501   iv_base = iv->base;
1502   iv_step = iv->step;
1503   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1504                             sizetype, &iv_base, &iv_step, dta->stmt,
1505                             false))
1506     {
1507       /* The index might wrap.  */
1508       return false;
1509     }
1510
1511   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1512   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1513
1514   return true;
1515 }
1516
1517 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1518    object is passed to it in DATA.  */
1519
1520 static bool
1521 idx_record_use (tree base, tree *idx,
1522                 void *vdata)
1523 {
1524   struct ivopts_data *data = (struct ivopts_data *) vdata;
1525   find_interesting_uses_op (data, *idx);
1526   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1527     {
1528       find_interesting_uses_op (data, array_ref_element_size (base));
1529       find_interesting_uses_op (data, array_ref_low_bound (base));
1530     }
1531   return true;
1532 }
1533
1534 /* If we can prove that TOP = cst * BOT for some constant cst,
1535    store cst to MUL and return true.  Otherwise return false.
1536    The returned value is always sign-extended, regardless of the
1537    signedness of TOP and BOT.  */
1538
1539 static bool
1540 constant_multiple_of (tree top, tree bot, double_int *mul)
1541 {
1542   tree mby;
1543   enum tree_code code;
1544   double_int res, p0, p1;
1545   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1546
1547   STRIP_NOPS (top);
1548   STRIP_NOPS (bot);
1549
1550   if (operand_equal_p (top, bot, 0))
1551     {
1552       *mul = double_int_one;
1553       return true;
1554     }
1555
1556   code = TREE_CODE (top);
1557   switch (code)
1558     {
1559     case MULT_EXPR:
1560       mby = TREE_OPERAND (top, 1);
1561       if (TREE_CODE (mby) != INTEGER_CST)
1562         return false;
1563
1564       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1565         return false;
1566
1567       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1568                               precision);
1569       return true;
1570
1571     case PLUS_EXPR:
1572     case MINUS_EXPR:
1573       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1574           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1575         return false;
1576
1577       if (code == MINUS_EXPR)
1578         p1 = double_int_neg (p1);
1579       *mul = double_int_sext (double_int_add (p0, p1), precision);
1580       return true;
1581
1582     case INTEGER_CST:
1583       if (TREE_CODE (bot) != INTEGER_CST)
1584         return false;
1585
1586       p0 = double_int_sext (tree_to_double_int (top), precision);
1587       p1 = double_int_sext (tree_to_double_int (bot), precision);
1588       if (double_int_zero_p (p1))
1589         return false;
1590       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1591                               precision);
1592       return double_int_zero_p (res);
1593
1594     default:
1595       return false;
1596     }
1597 }
1598
1599 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1600
1601 static bool
1602 may_be_unaligned_p (tree ref, tree step)
1603 {
1604   tree base;
1605   tree base_type;
1606   HOST_WIDE_INT bitsize;
1607   HOST_WIDE_INT bitpos;
1608   tree toffset;
1609   enum machine_mode mode;
1610   int unsignedp, volatilep;
1611   unsigned base_align;
1612
1613   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1614      thus they are not misaligned.  */
1615   if (TREE_CODE (ref) == TARGET_MEM_REF)
1616     return false;
1617
1618   /* The test below is basically copy of what expr.c:normal_inner_ref
1619      does to check whether the object must be loaded by parts when
1620      STRICT_ALIGNMENT is true.  */
1621   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1622                               &unsignedp, &volatilep, true);
1623   base_type = TREE_TYPE (base);
1624   base_align = TYPE_ALIGN (base_type);
1625
1626   if (mode != BLKmode)
1627     {
1628       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1629
1630       if (base_align < mode_align
1631           || (bitpos % mode_align) != 0
1632           || (bitpos % BITS_PER_UNIT) != 0)
1633         return true;
1634
1635       if (toffset
1636           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1637         return true;
1638
1639       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1640         return true;
1641     }
1642
1643   return false;
1644 }
1645
1646 /* Return true if EXPR may be non-addressable.   */
1647
1648 bool
1649 may_be_nonaddressable_p (tree expr)
1650 {
1651   switch (TREE_CODE (expr))
1652     {
1653     case TARGET_MEM_REF:
1654       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1655          target, thus they are always addressable.  */
1656       return false;
1657
1658     case COMPONENT_REF:
1659       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1660              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1661
1662     case VIEW_CONVERT_EXPR:
1663       /* This kind of view-conversions may wrap non-addressable objects
1664          and make them look addressable.  After some processing the
1665          non-addressability may be uncovered again, causing ADDR_EXPRs
1666          of inappropriate objects to be built.  */
1667       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1668           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1669         return true;
1670
1671       /* ... fall through ... */
1672
1673     case ARRAY_REF:
1674     case ARRAY_RANGE_REF:
1675       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1676
1677     CASE_CONVERT:
1678       return true;
1679
1680     default:
1681       break;
1682     }
1683
1684   return false;
1685 }
1686
1687 /* Finds addresses in *OP_P inside STMT.  */
1688
1689 static void
1690 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1691 {
1692   tree base = *op_p, step = size_zero_node;
1693   struct iv *civ;
1694   struct ifs_ivopts_data ifs_ivopts_data;
1695
1696   /* Do not play with volatile memory references.  A bit too conservative,
1697      perhaps, but safe.  */
1698   if (gimple_has_volatile_ops (stmt))
1699     goto fail;
1700
1701   /* Ignore bitfields for now.  Not really something terribly complicated
1702      to handle.  TODO.  */
1703   if (TREE_CODE (base) == BIT_FIELD_REF)
1704     goto fail;
1705
1706   base = unshare_expr (base);
1707
1708   if (TREE_CODE (base) == TARGET_MEM_REF)
1709     {
1710       tree type = build_pointer_type (TREE_TYPE (base));
1711       tree astep;
1712
1713       if (TMR_BASE (base)
1714           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1715         {
1716           civ = get_iv (data, TMR_BASE (base));
1717           if (!civ)
1718             goto fail;
1719
1720           TMR_BASE (base) = civ->base;
1721           step = civ->step;
1722         }
1723       if (TMR_INDEX2 (base)
1724           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1725         {
1726           civ = get_iv (data, TMR_INDEX2 (base));
1727           if (!civ)
1728             goto fail;
1729
1730           TMR_INDEX2 (base) = civ->base;
1731           step = civ->step;
1732         }
1733       if (TMR_INDEX (base)
1734           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1735         {
1736           civ = get_iv (data, TMR_INDEX (base));
1737           if (!civ)
1738             goto fail;
1739
1740           TMR_INDEX (base) = civ->base;
1741           astep = civ->step;
1742
1743           if (astep)
1744             {
1745               if (TMR_STEP (base))
1746                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1747
1748               step = fold_build2 (PLUS_EXPR, type, step, astep);
1749             }
1750         }
1751
1752       if (integer_zerop (step))
1753         goto fail;
1754       base = tree_mem_ref_addr (type, base);
1755     }
1756   else
1757     {
1758       ifs_ivopts_data.ivopts_data = data;
1759       ifs_ivopts_data.stmt = stmt;
1760       ifs_ivopts_data.step = size_zero_node;
1761       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1762           || integer_zerop (ifs_ivopts_data.step))
1763         goto fail;
1764       step = ifs_ivopts_data.step;
1765
1766       /* Check that the base expression is addressable.  This needs
1767          to be done after substituting bases of IVs into it.  */
1768       if (may_be_nonaddressable_p (base))
1769         goto fail;
1770
1771       /* Moreover, on strict alignment platforms, check that it is
1772          sufficiently aligned.  */
1773       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1774         goto fail;
1775
1776       base = build_fold_addr_expr (base);
1777
1778       /* Substituting bases of IVs into the base expression might
1779          have caused folding opportunities.  */
1780       if (TREE_CODE (base) == ADDR_EXPR)
1781         {
1782           tree *ref = &TREE_OPERAND (base, 0);
1783           while (handled_component_p (*ref))
1784             ref = &TREE_OPERAND (*ref, 0);
1785           if (TREE_CODE (*ref) == MEM_REF)
1786             {
1787               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1788                                       TREE_OPERAND (*ref, 0),
1789                                       TREE_OPERAND (*ref, 1));
1790               if (tem)
1791                 *ref = tem;
1792             }
1793         }
1794     }
1795
1796   civ = alloc_iv (base, step);
1797   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1798   return;
1799
1800 fail:
1801   for_each_index (op_p, idx_record_use, data);
1802 }
1803
1804 /* Finds and records invariants used in STMT.  */
1805
1806 static void
1807 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1808 {
1809   ssa_op_iter iter;
1810   use_operand_p use_p;
1811   tree op;
1812
1813   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1814     {
1815       op = USE_FROM_PTR (use_p);
1816       record_invariant (data, op, false);
1817     }
1818 }
1819
1820 /* Finds interesting uses of induction variables in the statement STMT.  */
1821
1822 static void
1823 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1824 {
1825   struct iv *iv;
1826   tree op, *lhs, *rhs;
1827   ssa_op_iter iter;
1828   use_operand_p use_p;
1829   enum tree_code code;
1830
1831   find_invariants_stmt (data, stmt);
1832
1833   if (gimple_code (stmt) == GIMPLE_COND)
1834     {
1835       find_interesting_uses_cond (data, stmt);
1836       return;
1837     }
1838
1839   if (is_gimple_assign (stmt))
1840     {
1841       lhs = gimple_assign_lhs_ptr (stmt);
1842       rhs = gimple_assign_rhs1_ptr (stmt);
1843
1844       if (TREE_CODE (*lhs) == SSA_NAME)
1845         {
1846           /* If the statement defines an induction variable, the uses are not
1847              interesting by themselves.  */
1848
1849           iv = get_iv (data, *lhs);
1850
1851           if (iv && !integer_zerop (iv->step))
1852             return;
1853         }
1854
1855       code = gimple_assign_rhs_code (stmt);
1856       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1857           && (REFERENCE_CLASS_P (*rhs)
1858               || is_gimple_val (*rhs)))
1859         {
1860           if (REFERENCE_CLASS_P (*rhs))
1861             find_interesting_uses_address (data, stmt, rhs);
1862           else
1863             find_interesting_uses_op (data, *rhs);
1864
1865           if (REFERENCE_CLASS_P (*lhs))
1866             find_interesting_uses_address (data, stmt, lhs);
1867           return;
1868         }
1869       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1870         {
1871           find_interesting_uses_cond (data, stmt);
1872           return;
1873         }
1874
1875       /* TODO -- we should also handle address uses of type
1876
1877          memory = call (whatever);
1878
1879          and
1880
1881          call (memory).  */
1882     }
1883
1884   if (gimple_code (stmt) == GIMPLE_PHI
1885       && gimple_bb (stmt) == data->current_loop->header)
1886     {
1887       iv = get_iv (data, PHI_RESULT (stmt));
1888
1889       if (iv && !integer_zerop (iv->step))
1890         return;
1891     }
1892
1893   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1894     {
1895       op = USE_FROM_PTR (use_p);
1896
1897       if (TREE_CODE (op) != SSA_NAME)
1898         continue;
1899
1900       iv = get_iv (data, op);
1901       if (!iv)
1902         continue;
1903
1904       find_interesting_uses_op (data, op);
1905     }
1906 }
1907
1908 /* Finds interesting uses of induction variables outside of loops
1909    on loop exit edge EXIT.  */
1910
1911 static void
1912 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1913 {
1914   gimple phi;
1915   gimple_stmt_iterator psi;
1916   tree def;
1917
1918   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1919     {
1920       phi = gsi_stmt (psi);
1921       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1922       if (is_gimple_reg (def))
1923         find_interesting_uses_op (data, def);
1924     }
1925 }
1926
1927 /* Finds uses of the induction variables that are interesting.  */
1928
1929 static void
1930 find_interesting_uses (struct ivopts_data *data)
1931 {
1932   basic_block bb;
1933   gimple_stmt_iterator bsi;
1934   basic_block *body = get_loop_body (data->current_loop);
1935   unsigned i;
1936   struct version_info *info;
1937   edge e;
1938
1939   if (dump_file && (dump_flags & TDF_DETAILS))
1940     fprintf (dump_file, "Uses:\n\n");
1941
1942   for (i = 0; i < data->current_loop->num_nodes; i++)
1943     {
1944       edge_iterator ei;
1945       bb = body[i];
1946
1947       FOR_EACH_EDGE (e, ei, bb->succs)
1948         if (e->dest != EXIT_BLOCK_PTR
1949             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1950           find_interesting_uses_outside (data, e);
1951
1952       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1953         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1954       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1955         if (!is_gimple_debug (gsi_stmt (bsi)))
1956           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1957     }
1958
1959   if (dump_file && (dump_flags & TDF_DETAILS))
1960     {
1961       bitmap_iterator bi;
1962
1963       fprintf (dump_file, "\n");
1964
1965       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1966         {
1967           info = ver_info (data, i);
1968           if (info->inv_id)
1969             {
1970               fprintf (dump_file, "  ");
1971               print_generic_expr (dump_file, info->name, TDF_SLIM);
1972               fprintf (dump_file, " is invariant (%d)%s\n",
1973                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1974             }
1975         }
1976
1977       fprintf (dump_file, "\n");
1978     }
1979
1980   free (body);
1981 }
1982
1983 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1984    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1985    we are at the top-level of the processed address.  */
1986
1987 static tree
1988 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1989                 unsigned HOST_WIDE_INT *offset)
1990 {
1991   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1992   enum tree_code code;
1993   tree type, orig_type = TREE_TYPE (expr);
1994   unsigned HOST_WIDE_INT off0, off1, st;
1995   tree orig_expr = expr;
1996
1997   STRIP_NOPS (expr);
1998
1999   type = TREE_TYPE (expr);
2000   code = TREE_CODE (expr);
2001   *offset = 0;
2002
2003   switch (code)
2004     {
2005     case INTEGER_CST:
2006       if (!cst_and_fits_in_hwi (expr)
2007           || integer_zerop (expr))
2008         return orig_expr;
2009
2010       *offset = int_cst_value (expr);
2011       return build_int_cst (orig_type, 0);
2012
2013     case POINTER_PLUS_EXPR:
2014     case PLUS_EXPR:
2015     case MINUS_EXPR:
2016       op0 = TREE_OPERAND (expr, 0);
2017       op1 = TREE_OPERAND (expr, 1);
2018
2019       op0 = strip_offset_1 (op0, false, false, &off0);
2020       op1 = strip_offset_1 (op1, false, false, &off1);
2021
2022       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2023       if (op0 == TREE_OPERAND (expr, 0)
2024           && op1 == TREE_OPERAND (expr, 1))
2025         return orig_expr;
2026
2027       if (integer_zerop (op1))
2028         expr = op0;
2029       else if (integer_zerop (op0))
2030         {
2031           if (code == MINUS_EXPR)
2032             expr = fold_build1 (NEGATE_EXPR, type, op1);
2033           else
2034             expr = op1;
2035         }
2036       else
2037         expr = fold_build2 (code, type, op0, op1);
2038
2039       return fold_convert (orig_type, expr);
2040
2041     case MULT_EXPR:
2042       op1 = TREE_OPERAND (expr, 1);
2043       if (!cst_and_fits_in_hwi (op1))
2044         return orig_expr;
2045
2046       op0 = TREE_OPERAND (expr, 0);
2047       op0 = strip_offset_1 (op0, false, false, &off0);
2048       if (op0 == TREE_OPERAND (expr, 0))
2049         return orig_expr;
2050
2051       *offset = off0 * int_cst_value (op1);
2052       if (integer_zerop (op0))
2053         expr = op0;
2054       else
2055         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2056
2057       return fold_convert (orig_type, expr);
2058
2059     case ARRAY_REF:
2060     case ARRAY_RANGE_REF:
2061       if (!inside_addr)
2062         return orig_expr;
2063
2064       step = array_ref_element_size (expr);
2065       if (!cst_and_fits_in_hwi (step))
2066         break;
2067
2068       st = int_cst_value (step);
2069       op1 = TREE_OPERAND (expr, 1);
2070       op1 = strip_offset_1 (op1, false, false, &off1);
2071       *offset = off1 * st;
2072
2073       if (top_compref
2074           && integer_zerop (op1))
2075         {
2076           /* Strip the component reference completely.  */
2077           op0 = TREE_OPERAND (expr, 0);
2078           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2079           *offset += off0;
2080           return op0;
2081         }
2082       break;
2083
2084     case COMPONENT_REF:
2085       if (!inside_addr)
2086         return orig_expr;
2087
2088       tmp = component_ref_field_offset (expr);
2089       if (top_compref
2090           && cst_and_fits_in_hwi (tmp))
2091         {
2092           /* Strip the component reference completely.  */
2093           op0 = TREE_OPERAND (expr, 0);
2094           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2095           *offset = off0 + int_cst_value (tmp);
2096           return op0;
2097         }
2098       break;
2099
2100     case ADDR_EXPR:
2101       op0 = TREE_OPERAND (expr, 0);
2102       op0 = strip_offset_1 (op0, true, true, &off0);
2103       *offset += off0;
2104
2105       if (op0 == TREE_OPERAND (expr, 0))
2106         return orig_expr;
2107
2108       expr = build_fold_addr_expr (op0);
2109       return fold_convert (orig_type, expr);
2110
2111     case MEM_REF:
2112       /* ???  Offset operand?  */
2113       inside_addr = false;
2114       break;
2115
2116     default:
2117       return orig_expr;
2118     }
2119
2120   /* Default handling of expressions for that we want to recurse into
2121      the first operand.  */
2122   op0 = TREE_OPERAND (expr, 0);
2123   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2124   *offset += off0;
2125
2126   if (op0 == TREE_OPERAND (expr, 0)
2127       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2128     return orig_expr;
2129
2130   expr = copy_node (expr);
2131   TREE_OPERAND (expr, 0) = op0;
2132   if (op1)
2133     TREE_OPERAND (expr, 1) = op1;
2134
2135   /* Inside address, we might strip the top level component references,
2136      thus changing type of the expression.  Handling of ADDR_EXPR
2137      will fix that.  */
2138   expr = fold_convert (orig_type, expr);
2139
2140   return expr;
2141 }
2142
2143 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2144
2145 static tree
2146 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2147 {
2148   return strip_offset_1 (expr, false, false, offset);
2149 }
2150
2151 /* Returns variant of TYPE that can be used as base for different uses.
2152    We return unsigned type with the same precision, which avoids problems
2153    with overflows.  */
2154
2155 static tree
2156 generic_type_for (tree type)
2157 {
2158   if (POINTER_TYPE_P (type))
2159     return unsigned_type_for (type);
2160
2161   if (TYPE_UNSIGNED (type))
2162     return type;
2163
2164   return unsigned_type_for (type);
2165 }
2166
2167 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2168    the bitmap to that we should store it.  */
2169
2170 static struct ivopts_data *fd_ivopts_data;
2171 static tree
2172 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2173 {
2174   bitmap *depends_on = (bitmap *) data;
2175   struct version_info *info;
2176
2177   if (TREE_CODE (*expr_p) != SSA_NAME)
2178     return NULL_TREE;
2179   info = name_info (fd_ivopts_data, *expr_p);
2180
2181   if (!info->inv_id || info->has_nonlin_use)
2182     return NULL_TREE;
2183
2184   if (!*depends_on)
2185     *depends_on = BITMAP_ALLOC (NULL);
2186   bitmap_set_bit (*depends_on, info->inv_id);
2187
2188   return NULL_TREE;
2189 }
2190
2191 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2192    position to POS.  If USE is not NULL, the candidate is set as related to
2193    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2194    replacement of the final value of the iv by a direct computation.  */
2195
2196 static struct iv_cand *
2197 add_candidate_1 (struct ivopts_data *data,
2198                  tree base, tree step, bool important, enum iv_position pos,
2199                  struct iv_use *use, gimple incremented_at)
2200 {
2201   unsigned i;
2202   struct iv_cand *cand = NULL;
2203   tree type, orig_type;
2204
2205   if (base)
2206     {
2207       orig_type = TREE_TYPE (base);
2208       type = generic_type_for (orig_type);
2209       if (type != orig_type)
2210         {
2211           base = fold_convert (type, base);
2212           step = fold_convert (type, step);
2213         }
2214     }
2215
2216   for (i = 0; i < n_iv_cands (data); i++)
2217     {
2218       cand = iv_cand (data, i);
2219
2220       if (cand->pos != pos)
2221         continue;
2222
2223       if (cand->incremented_at != incremented_at
2224           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2225               && cand->ainc_use != use))
2226         continue;
2227
2228       if (!cand->iv)
2229         {
2230           if (!base && !step)
2231             break;
2232
2233           continue;
2234         }
2235
2236       if (!base && !step)
2237         continue;
2238
2239       if (operand_equal_p (base, cand->iv->base, 0)
2240           && operand_equal_p (step, cand->iv->step, 0)
2241           && (TYPE_PRECISION (TREE_TYPE (base))
2242               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2243         break;
2244     }
2245
2246   if (i == n_iv_cands (data))
2247     {
2248       cand = XCNEW (struct iv_cand);
2249       cand->id = i;
2250
2251       if (!base && !step)
2252         cand->iv = NULL;
2253       else
2254         cand->iv = alloc_iv (base, step);
2255
2256       cand->pos = pos;
2257       if (pos != IP_ORIGINAL && cand->iv)
2258         {
2259           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2260           cand->var_after = cand->var_before;
2261         }
2262       cand->important = important;
2263       cand->incremented_at = incremented_at;
2264       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2265
2266       if (step
2267           && TREE_CODE (step) != INTEGER_CST)
2268         {
2269           fd_ivopts_data = data;
2270           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2271         }
2272
2273       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2274         cand->ainc_use = use;
2275       else
2276         cand->ainc_use = NULL;
2277
2278       if (dump_file && (dump_flags & TDF_DETAILS))
2279         dump_cand (dump_file, cand);
2280     }
2281
2282   if (important && !cand->important)
2283     {
2284       cand->important = true;
2285       if (dump_file && (dump_flags & TDF_DETAILS))
2286         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2287     }
2288
2289   if (use)
2290     {
2291       bitmap_set_bit (use->related_cands, i);
2292       if (dump_file && (dump_flags & TDF_DETAILS))
2293         fprintf (dump_file, "Candidate %d is related to use %d\n",
2294                  cand->id, use->id);
2295     }
2296
2297   return cand;
2298 }
2299
2300 /* Returns true if incrementing the induction variable at the end of the LOOP
2301    is allowed.
2302
2303    The purpose is to avoid splitting latch edge with a biv increment, thus
2304    creating a jump, possibly confusing other optimization passes and leaving
2305    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2306    is not available (so we do not have a better alternative), or if the latch
2307    edge is already nonempty.  */
2308
2309 static bool
2310 allow_ip_end_pos_p (struct loop *loop)
2311 {
2312   if (!ip_normal_pos (loop))
2313     return true;
2314
2315   if (!empty_block_p (ip_end_pos (loop)))
2316     return true;
2317
2318   return false;
2319 }
2320
2321 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2322    Important field is set to IMPORTANT.  */
2323
2324 static void
2325 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2326                         bool important, struct iv_use *use)
2327 {
2328   basic_block use_bb = gimple_bb (use->stmt);
2329   enum machine_mode mem_mode;
2330   unsigned HOST_WIDE_INT cstepi;
2331
2332   /* If we insert the increment in any position other than the standard
2333      ones, we must ensure that it is incremented once per iteration.
2334      It must not be in an inner nested loop, or one side of an if
2335      statement.  */
2336   if (use_bb->loop_father != data->current_loop
2337       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2338       || stmt_could_throw_p (use->stmt)
2339       || !cst_and_fits_in_hwi (step))
2340     return;
2341
2342   cstepi = int_cst_value (step);
2343
2344   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2345   if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2346       || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2347     {
2348       enum tree_code code = MINUS_EXPR;
2349       tree new_base;
2350       tree new_step = step;
2351
2352       if (POINTER_TYPE_P (TREE_TYPE (base)))
2353         {
2354           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2355           code = POINTER_PLUS_EXPR;
2356         }
2357       else
2358         new_step = fold_convert (TREE_TYPE (base), new_step);
2359       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2360       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2361                        use->stmt);
2362     }
2363   if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2364       || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2365     {
2366       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2367                        use->stmt);
2368     }
2369 }
2370
2371 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2372    position to POS.  If USE is not NULL, the candidate is set as related to
2373    it.  The candidate computation is scheduled on all available positions.  */
2374
2375 static void
2376 add_candidate (struct ivopts_data *data,
2377                tree base, tree step, bool important, struct iv_use *use)
2378 {
2379   if (ip_normal_pos (data->current_loop))
2380     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2381   if (ip_end_pos (data->current_loop)
2382       && allow_ip_end_pos_p (data->current_loop))
2383     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2384
2385   if (use != NULL && use->type == USE_ADDRESS)
2386     add_autoinc_candidates (data, base, step, important, use);
2387 }
2388
2389 /* Add a standard "0 + 1 * iteration" iv candidate for a
2390    type with SIZE bits.  */
2391
2392 static void
2393 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2394                                      unsigned int size)
2395 {
2396   tree type = lang_hooks.types.type_for_size (size, true);
2397   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2398                  true, NULL);
2399 }
2400
2401 /* Adds standard iv candidates.  */
2402
2403 static void
2404 add_standard_iv_candidates (struct ivopts_data *data)
2405 {
2406   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2407
2408   /* The same for a double-integer type if it is still fast enough.  */
2409   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2410     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2411 }
2412
2413
2414 /* Adds candidates bases on the old induction variable IV.  */
2415
2416 static void
2417 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2418 {
2419   gimple phi;
2420   tree def;
2421   struct iv_cand *cand;
2422
2423   add_candidate (data, iv->base, iv->step, true, NULL);
2424
2425   /* The same, but with initial value zero.  */
2426   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2427     add_candidate (data, size_int (0), iv->step, true, NULL);
2428   else
2429     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2430                    iv->step, true, NULL);
2431
2432   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2433   if (gimple_code (phi) == GIMPLE_PHI)
2434     {
2435       /* Additionally record the possibility of leaving the original iv
2436          untouched.  */
2437       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2438       cand = add_candidate_1 (data,
2439                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2440                               SSA_NAME_DEF_STMT (def));
2441       cand->var_before = iv->ssa_name;
2442       cand->var_after = def;
2443     }
2444 }
2445
2446 /* Adds candidates based on the old induction variables.  */
2447
2448 static void
2449 add_old_ivs_candidates (struct ivopts_data *data)
2450 {
2451   unsigned i;
2452   struct iv *iv;
2453   bitmap_iterator bi;
2454
2455   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2456     {
2457       iv = ver_info (data, i)->iv;
2458       if (iv && iv->biv_p && !integer_zerop (iv->step))
2459         add_old_iv_candidates (data, iv);
2460     }
2461 }
2462
2463 /* Adds candidates based on the value of the induction variable IV and USE.  */
2464
2465 static void
2466 add_iv_value_candidates (struct ivopts_data *data,
2467                          struct iv *iv, struct iv_use *use)
2468 {
2469   unsigned HOST_WIDE_INT offset;
2470   tree base;
2471   tree basetype;
2472
2473   add_candidate (data, iv->base, iv->step, false, use);
2474
2475   /* The same, but with initial value zero.  Make such variable important,
2476      since it is generic enough so that possibly many uses may be based
2477      on it.  */
2478   basetype = TREE_TYPE (iv->base);
2479   if (POINTER_TYPE_P (basetype))
2480     basetype = sizetype;
2481   add_candidate (data, build_int_cst (basetype, 0),
2482                  iv->step, true, use);
2483
2484   /* Third, try removing the constant offset.  Make sure to even
2485      add a candidate for &a[0] vs. (T *)&a.  */
2486   base = strip_offset (iv->base, &offset);
2487   if (offset
2488       || base != iv->base)
2489     add_candidate (data, base, iv->step, false, use);
2490 }
2491
2492 /* Adds candidates based on the uses.  */
2493
2494 static void
2495 add_derived_ivs_candidates (struct ivopts_data *data)
2496 {
2497   unsigned i;
2498
2499   for (i = 0; i < n_iv_uses (data); i++)
2500     {
2501       struct iv_use *use = iv_use (data, i);
2502
2503       if (!use)
2504         continue;
2505
2506       switch (use->type)
2507         {
2508         case USE_NONLINEAR_EXPR:
2509         case USE_COMPARE:
2510         case USE_ADDRESS:
2511           /* Just add the ivs based on the value of the iv used here.  */
2512           add_iv_value_candidates (data, use->iv, use);
2513           break;
2514
2515         default:
2516           gcc_unreachable ();
2517         }
2518     }
2519 }
2520
2521 /* Record important candidates and add them to related_cands bitmaps
2522    if needed.  */
2523
2524 static void
2525 record_important_candidates (struct ivopts_data *data)
2526 {
2527   unsigned i;
2528   struct iv_use *use;
2529
2530   for (i = 0; i < n_iv_cands (data); i++)
2531     {
2532       struct iv_cand *cand = iv_cand (data, i);
2533
2534       if (cand->important)
2535         bitmap_set_bit (data->important_candidates, i);
2536     }
2537
2538   data->consider_all_candidates = (n_iv_cands (data)
2539                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2540
2541   if (data->consider_all_candidates)
2542     {
2543       /* We will not need "related_cands" bitmaps in this case,
2544          so release them to decrease peak memory consumption.  */
2545       for (i = 0; i < n_iv_uses (data); i++)
2546         {
2547           use = iv_use (data, i);
2548           BITMAP_FREE (use->related_cands);
2549         }
2550     }
2551   else
2552     {
2553       /* Add important candidates to the related_cands bitmaps.  */
2554       for (i = 0; i < n_iv_uses (data); i++)
2555         bitmap_ior_into (iv_use (data, i)->related_cands,
2556                          data->important_candidates);
2557     }
2558 }
2559
2560 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2561    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2562    we allocate a simple list to every use.  */
2563
2564 static void
2565 alloc_use_cost_map (struct ivopts_data *data)
2566 {
2567   unsigned i, size, s, j;
2568
2569   for (i = 0; i < n_iv_uses (data); i++)
2570     {
2571       struct iv_use *use = iv_use (data, i);
2572       bitmap_iterator bi;
2573
2574       if (data->consider_all_candidates)
2575         size = n_iv_cands (data);
2576       else
2577         {
2578           s = 0;
2579           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2580             {
2581               s++;
2582             }
2583
2584           /* Round up to the power of two, so that moduling by it is fast.  */
2585           for (size = 1; size < s; size <<= 1)
2586             continue;
2587         }
2588
2589       use->n_map_members = size;
2590       use->cost_map = XCNEWVEC (struct cost_pair, size);
2591     }
2592 }
2593
2594 /* Returns description of computation cost of expression whose runtime
2595    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2596
2597 static comp_cost
2598 new_cost (unsigned runtime, unsigned complexity)
2599 {
2600   comp_cost cost;
2601
2602   cost.cost = runtime;
2603   cost.complexity = complexity;
2604
2605   return cost;
2606 }
2607
2608 /* Adds costs COST1 and COST2.  */
2609
2610 static comp_cost
2611 add_costs (comp_cost cost1, comp_cost cost2)
2612 {
2613   cost1.cost += cost2.cost;
2614   cost1.complexity += cost2.complexity;
2615
2616   return cost1;
2617 }
2618 /* Subtracts costs COST1 and COST2.  */
2619
2620 static comp_cost
2621 sub_costs (comp_cost cost1, comp_cost cost2)
2622 {
2623   cost1.cost -= cost2.cost;
2624   cost1.complexity -= cost2.complexity;
2625
2626   return cost1;
2627 }
2628
2629 /* Returns a negative number if COST1 < COST2, a positive number if
2630    COST1 > COST2, and 0 if COST1 = COST2.  */
2631
2632 static int
2633 compare_costs (comp_cost cost1, comp_cost cost2)
2634 {
2635   if (cost1.cost == cost2.cost)
2636     return cost1.complexity - cost2.complexity;
2637
2638   return cost1.cost - cost2.cost;
2639 }
2640
2641 /* Returns true if COST is infinite.  */
2642
2643 static bool
2644 infinite_cost_p (comp_cost cost)
2645 {
2646   return cost.cost == INFTY;
2647 }
2648
2649 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2650    on invariants DEPENDS_ON and that the value used in expressing it
2651    is VALUE.  */
2652
2653 static void
2654 set_use_iv_cost (struct ivopts_data *data,
2655                  struct iv_use *use, struct iv_cand *cand,
2656                  comp_cost cost, bitmap depends_on, tree value,
2657                  int inv_expr_id)
2658 {
2659   unsigned i, s;
2660
2661   if (infinite_cost_p (cost))
2662     {
2663       BITMAP_FREE (depends_on);
2664       return;
2665     }
2666
2667   if (data->consider_all_candidates)
2668     {
2669       use->cost_map[cand->id].cand = cand;
2670       use->cost_map[cand->id].cost = cost;
2671       use->cost_map[cand->id].depends_on = depends_on;
2672       use->cost_map[cand->id].value = value;
2673       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2674       return;
2675     }
2676
2677   /* n_map_members is a power of two, so this computes modulo.  */
2678   s = cand->id & (use->n_map_members - 1);
2679   for (i = s; i < use->n_map_members; i++)
2680     if (!use->cost_map[i].cand)
2681       goto found;
2682   for (i = 0; i < s; i++)
2683     if (!use->cost_map[i].cand)
2684       goto found;
2685
2686   gcc_unreachable ();
2687
2688 found:
2689   use->cost_map[i].cand = cand;
2690   use->cost_map[i].cost = cost;
2691   use->cost_map[i].depends_on = depends_on;
2692   use->cost_map[i].value = value;
2693   use->cost_map[i].inv_expr_id = inv_expr_id;
2694 }
2695
2696 /* Gets cost of (USE, CANDIDATE) pair.  */
2697
2698 static struct cost_pair *
2699 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2700                  struct iv_cand *cand)
2701 {
2702   unsigned i, s;
2703   struct cost_pair *ret;
2704
2705   if (!cand)
2706     return NULL;
2707
2708   if (data->consider_all_candidates)
2709     {
2710       ret = use->cost_map + cand->id;
2711       if (!ret->cand)
2712         return NULL;
2713
2714       return ret;
2715     }
2716
2717   /* n_map_members is a power of two, so this computes modulo.  */
2718   s = cand->id & (use->n_map_members - 1);
2719   for (i = s; i < use->n_map_members; i++)
2720     if (use->cost_map[i].cand == cand)
2721       return use->cost_map + i;
2722
2723   for (i = 0; i < s; i++)
2724     if (use->cost_map[i].cand == cand)
2725       return use->cost_map + i;
2726
2727   return NULL;
2728 }
2729
2730 /* Returns estimate on cost of computing SEQ.  */
2731
2732 static unsigned
2733 seq_cost (rtx seq, bool speed)
2734 {
2735   unsigned cost = 0;
2736   rtx set;
2737
2738   for (; seq; seq = NEXT_INSN (seq))
2739     {
2740       set = single_set (seq);
2741       if (set)
2742         cost += rtx_cost (set, SET,speed);
2743       else
2744         cost++;
2745     }
2746
2747   return cost;
2748 }
2749
2750 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2751 static rtx
2752 produce_memory_decl_rtl (tree obj, int *regno)
2753 {
2754   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2755   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2756   rtx x;
2757
2758   gcc_assert (obj);
2759   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2760     {
2761       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2762       x = gen_rtx_SYMBOL_REF (address_mode, name);
2763       SET_SYMBOL_REF_DECL (x, obj);
2764       x = gen_rtx_MEM (DECL_MODE (obj), x);
2765       set_mem_addr_space (x, as);
2766       targetm.encode_section_info (obj, x, true);
2767     }
2768   else
2769     {
2770       x = gen_raw_REG (address_mode, (*regno)++);
2771       x = gen_rtx_MEM (DECL_MODE (obj), x);
2772       set_mem_addr_space (x, as);
2773     }
2774
2775   return x;
2776 }
2777
2778 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2779    walk_tree.  DATA contains the actual fake register number.  */
2780
2781 static tree
2782 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2783 {
2784   tree obj = NULL_TREE;
2785   rtx x = NULL_RTX;
2786   int *regno = (int *) data;
2787
2788   switch (TREE_CODE (*expr_p))
2789     {
2790     case ADDR_EXPR:
2791       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2792            handled_component_p (*expr_p);
2793            expr_p = &TREE_OPERAND (*expr_p, 0))
2794         continue;
2795       obj = *expr_p;
2796       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2797         x = produce_memory_decl_rtl (obj, regno);
2798       break;
2799
2800     case SSA_NAME:
2801       *ws = 0;
2802       obj = SSA_NAME_VAR (*expr_p);
2803       if (!DECL_RTL_SET_P (obj))
2804         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2805       break;
2806
2807     case VAR_DECL:
2808     case PARM_DECL:
2809     case RESULT_DECL:
2810       *ws = 0;
2811       obj = *expr_p;
2812
2813       if (DECL_RTL_SET_P (obj))
2814         break;
2815
2816       if (DECL_MODE (obj) == BLKmode)
2817         x = produce_memory_decl_rtl (obj, regno);
2818       else
2819         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2820
2821       break;
2822
2823     default:
2824       break;
2825     }
2826
2827   if (x)
2828     {
2829       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2830       SET_DECL_RTL (obj, x);
2831     }
2832
2833   return NULL_TREE;
2834 }
2835
2836 /* Determines cost of the computation of EXPR.  */
2837
2838 static unsigned
2839 computation_cost (tree expr, bool speed)
2840 {
2841   rtx seq, rslt;
2842   tree type = TREE_TYPE (expr);
2843   unsigned cost;
2844   /* Avoid using hard regs in ways which may be unsupported.  */
2845   int regno = LAST_VIRTUAL_REGISTER + 1;
2846   struct cgraph_node *node = cgraph_node (current_function_decl);
2847   enum node_frequency real_frequency = node->frequency;
2848
2849   node->frequency = NODE_FREQUENCY_NORMAL;
2850   crtl->maybe_hot_insn_p = speed;
2851   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2852   start_sequence ();
2853   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2854   seq = get_insns ();
2855   end_sequence ();
2856   default_rtl_profile ();
2857   node->frequency = real_frequency;
2858
2859   cost = seq_cost (seq, speed);
2860   if (MEM_P (rslt))
2861     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2862                           TYPE_ADDR_SPACE (type), speed);
2863
2864   return cost;
2865 }
2866
2867 /* Returns variable containing the value of candidate CAND at statement AT.  */
2868
2869 static tree
2870 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2871 {
2872   if (stmt_after_increment (loop, cand, stmt))
2873     return cand->var_after;
2874   else
2875     return cand->var_before;
2876 }
2877
2878 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2879    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2880
2881 int
2882 tree_int_cst_sign_bit (const_tree t)
2883 {
2884   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2885   unsigned HOST_WIDE_INT w;
2886
2887   if (bitno < HOST_BITS_PER_WIDE_INT)
2888     w = TREE_INT_CST_LOW (t);
2889   else
2890     {
2891       w = TREE_INT_CST_HIGH (t);
2892       bitno -= HOST_BITS_PER_WIDE_INT;
2893     }
2894
2895   return (w >> bitno) & 1;
2896 }
2897
2898 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2899    same precision that is at least as wide as the precision of TYPE, stores
2900    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2901    type of A and B.  */
2902
2903 static tree
2904 determine_common_wider_type (tree *a, tree *b)
2905 {
2906   tree wider_type = NULL;
2907   tree suba, subb;
2908   tree atype = TREE_TYPE (*a);
2909
2910   if (CONVERT_EXPR_P (*a))
2911     {
2912       suba = TREE_OPERAND (*a, 0);
2913       wider_type = TREE_TYPE (suba);
2914       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2915         return atype;
2916     }
2917   else
2918     return atype;
2919
2920   if (CONVERT_EXPR_P (*b))
2921     {
2922       subb = TREE_OPERAND (*b, 0);
2923       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2924         return atype;
2925     }
2926   else
2927     return atype;
2928
2929   *a = suba;
2930   *b = subb;
2931   return wider_type;
2932 }
2933
2934 /* Determines the expression by that USE is expressed from induction variable
2935    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2936    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2937
2938 static bool
2939 get_computation_aff (struct loop *loop,
2940                      struct iv_use *use, struct iv_cand *cand, gimple at,
2941                      struct affine_tree_combination *aff)
2942 {
2943   tree ubase = use->iv->base;
2944   tree ustep = use->iv->step;
2945   tree cbase = cand->iv->base;
2946   tree cstep = cand->iv->step, cstep_common;
2947   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2948   tree common_type, var;
2949   tree uutype;
2950   aff_tree cbase_aff, var_aff;
2951   double_int rat;
2952
2953   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2954     {
2955       /* We do not have a precision to express the values of use.  */
2956       return false;
2957     }
2958
2959   var = var_at_stmt (loop, cand, at);
2960   uutype = unsigned_type_for (utype);
2961
2962   /* If the conversion is not noop, perform it.  */
2963   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2964     {
2965       cstep = fold_convert (uutype, cstep);
2966       cbase = fold_convert (uutype, cbase);
2967       var = fold_convert (uutype, var);
2968     }
2969
2970   if (!constant_multiple_of (ustep, cstep, &rat))
2971     return false;
2972
2973   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2974      type, we achieve better folding by computing their difference in this
2975      wider type, and cast the result to UUTYPE.  We do not need to worry about
2976      overflows, as all the arithmetics will in the end be performed in UUTYPE
2977      anyway.  */
2978   common_type = determine_common_wider_type (&ubase, &cbase);
2979
2980   /* use = ubase - ratio * cbase + ratio * var.  */
2981   tree_to_aff_combination (ubase, common_type, aff);
2982   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2983   tree_to_aff_combination (var, uutype, &var_aff);
2984
2985   /* We need to shift the value if we are after the increment.  */
2986   if (stmt_after_increment (loop, cand, at))
2987     {
2988       aff_tree cstep_aff;
2989
2990       if (common_type != uutype)
2991         cstep_common = fold_convert (common_type, cstep);
2992       else
2993         cstep_common = cstep;
2994
2995       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2996       aff_combination_add (&cbase_aff, &cstep_aff);
2997     }
2998
2999   aff_combination_scale (&cbase_aff, double_int_neg (rat));
3000   aff_combination_add (aff, &cbase_aff);
3001   if (common_type != uutype)
3002     aff_combination_convert (aff, uutype);
3003
3004   aff_combination_scale (&var_aff, rat);
3005   aff_combination_add (aff, &var_aff);
3006
3007   return true;
3008 }
3009
3010 /* Determines the expression by that USE is expressed from induction variable
3011    CAND at statement AT in LOOP.  The computation is unshared.  */
3012
3013 static tree
3014 get_computation_at (struct loop *loop,
3015                     struct iv_use *use, struct iv_cand *cand, gimple at)
3016 {
3017   aff_tree aff;
3018   tree type = TREE_TYPE (use->iv->base);
3019
3020   if (!get_computation_aff (loop, use, cand, at, &aff))
3021     return NULL_TREE;
3022   unshare_aff_combination (&aff);
3023   return fold_convert (type, aff_combination_to_tree (&aff));
3024 }
3025
3026 /* Determines the expression by that USE is expressed from induction variable
3027    CAND in LOOP.  The computation is unshared.  */
3028
3029 static tree
3030 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3031 {
3032   return get_computation_at (loop, use, cand, use->stmt);
3033 }
3034
3035 /* Adjust the cost COST for being in loop setup rather than loop body.
3036    If we're optimizing for space, the loop setup overhead is constant;
3037    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3038 static unsigned
3039 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3040 {
3041   if (cost == INFTY)
3042     return cost;
3043   else if (optimize_loop_for_speed_p (data->current_loop))
3044     return cost / avg_loop_niter (data->current_loop);
3045   else
3046     return cost;
3047 }
3048
3049 /* Returns cost of addition in MODE.  */
3050
3051 static unsigned
3052 add_cost (enum machine_mode mode, bool speed)
3053 {
3054   static unsigned costs[NUM_MACHINE_MODES];
3055   rtx seq;
3056   unsigned cost;
3057
3058   if (costs[mode])
3059     return costs[mode];
3060
3061   start_sequence ();
3062   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3063                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3064                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3065                  NULL_RTX);
3066   seq = get_insns ();
3067   end_sequence ();
3068
3069   cost = seq_cost (seq, speed);
3070   if (!cost)
3071     cost = 1;
3072
3073   costs[mode] = cost;
3074
3075   if (dump_file && (dump_flags & TDF_DETAILS))
3076     fprintf (dump_file, "Addition in %s costs %d\n",
3077              GET_MODE_NAME (mode), cost);
3078   return cost;
3079 }
3080
3081 /* Entry in a hashtable of already known costs for multiplication.  */
3082 struct mbc_entry
3083 {
3084   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3085   enum machine_mode mode;       /* In mode.  */
3086   unsigned cost;                /* The cost.  */
3087 };
3088
3089 /* Counts hash value for the ENTRY.  */
3090
3091 static hashval_t
3092 mbc_entry_hash (const void *entry)
3093 {
3094   const struct mbc_entry *e = (const struct mbc_entry *) entry;
3095
3096   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3097 }
3098
3099 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3100
3101 static int
3102 mbc_entry_eq (const void *entry1, const void *entry2)
3103 {
3104   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3105   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3106
3107   return (e1->mode == e2->mode
3108           && e1->cst == e2->cst);
3109 }
3110
3111 /* Returns cost of multiplication by constant CST in MODE.  */
3112
3113 unsigned
3114 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3115 {
3116   static htab_t costs;
3117   struct mbc_entry **cached, act;
3118   rtx seq;
3119   unsigned cost;
3120
3121   if (!costs)
3122     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3123
3124   act.mode = mode;
3125   act.cst = cst;
3126   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3127   if (*cached)
3128     return (*cached)->cost;
3129
3130   *cached = XNEW (struct mbc_entry);
3131   (*cached)->mode = mode;
3132   (*cached)->cst = cst;
3133
3134   start_sequence ();
3135   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3136                gen_int_mode (cst, mode), NULL_RTX, 0);
3137   seq = get_insns ();
3138   end_sequence ();
3139
3140   cost = seq_cost (seq, speed);
3141
3142   if (dump_file && (dump_flags & TDF_DETAILS))
3143     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3144              (int) cst, GET_MODE_NAME (mode), cost);
3145
3146   (*cached)->cost = cost;
3147
3148   return cost;
3149 }
3150
3151 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3152    validity for a memory reference accessing memory of mode MODE in
3153    address space AS.  */
3154
3155 DEF_VEC_P (sbitmap);
3156 DEF_VEC_ALLOC_P (sbitmap, heap);
3157
3158 bool
3159 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3160                                  addr_space_t as)
3161 {
3162 #define MAX_RATIO 128
3163   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3164   static VEC (sbitmap, heap) *valid_mult_list;
3165   sbitmap valid_mult;
3166
3167   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3168     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3169
3170   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3171   if (!valid_mult)
3172     {
3173       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3174       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3175       rtx addr;
3176       HOST_WIDE_INT i;
3177
3178       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3179       sbitmap_zero (valid_mult);
3180       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3181       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3182         {
3183           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3184           if (memory_address_addr_space_p (mode, addr, as))
3185             SET_BIT (valid_mult, i + MAX_RATIO);
3186         }
3187
3188       if (dump_file && (dump_flags & TDF_DETAILS))
3189         {
3190           fprintf (dump_file, "  allowed multipliers:");
3191           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3192             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3193               fprintf (dump_file, " %d", (int) i);
3194           fprintf (dump_file, "\n");
3195           fprintf (dump_file, "\n");
3196         }
3197
3198       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3199     }
3200
3201   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3202     return false;
3203
3204   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3205 }
3206
3207 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3208    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3209    variable is omitted.  Compute the cost for a memory reference that accesses
3210    a memory location of mode MEM_MODE in address space AS.
3211
3212    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3213    size of MEM_MODE / RATIO) is available.  To make this determination, we
3214    look at the size of the increment to be made, which is given in CSTEP.
3215    CSTEP may be zero if the step is unknown.
3216    STMT_AFTER_INC is true iff the statement we're looking at is after the
3217    increment of the original biv.
3218
3219    TODO -- there must be some better way.  This all is quite crude.  */
3220
3221 typedef struct
3222 {
3223   HOST_WIDE_INT min_offset, max_offset;
3224   unsigned costs[2][2][2][2];
3225 } *address_cost_data;
3226
3227 DEF_VEC_P (address_cost_data);
3228 DEF_VEC_ALLOC_P (address_cost_data, heap);
3229
3230 static comp_cost
3231 get_address_cost (bool symbol_present, bool var_present,
3232                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3233                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3234                   addr_space_t as, bool speed,
3235                   bool stmt_after_inc, bool *may_autoinc)
3236 {
3237   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3238   static VEC(address_cost_data, heap) *address_cost_data_list;
3239   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3240   address_cost_data data;
3241   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3242   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3243   unsigned cost, acost, complexity;
3244   bool offset_p, ratio_p, autoinc;
3245   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3246   unsigned HOST_WIDE_INT mask;
3247   unsigned bits;
3248
3249   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3250     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3251                            data_index + 1);
3252
3253   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3254   if (!data)
3255     {
3256       HOST_WIDE_INT i;
3257       HOST_WIDE_INT rat, off = 0;
3258       int old_cse_not_expected, width;
3259       unsigned sym_p, var_p, off_p, rat_p, add_c;
3260       rtx seq, addr, base;
3261       rtx reg0, reg1;
3262
3263       data = (address_cost_data) xcalloc (1, sizeof (*data));
3264
3265       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3266
3267       width = GET_MODE_BITSIZE (address_mode) - 1;
3268       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3269         width = HOST_BITS_PER_WIDE_INT - 1;
3270       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3271
3272       for (i = width; i >= 0; i--)
3273         {
3274           off = -((HOST_WIDE_INT) 1 << i);
3275           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3276           if (memory_address_addr_space_p (mem_mode, addr, as))
3277             break;
3278         }
3279       data->min_offset = (i == -1? 0 : off);
3280
3281       for (i = width; i >= 0; i--)
3282         {
3283           off = ((HOST_WIDE_INT) 1 << i) - 1;
3284           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3285           if (memory_address_addr_space_p (mem_mode, addr, as))
3286             break;
3287         }
3288       if (i == -1)
3289         off = 0;
3290       data->max_offset = off;
3291
3292       if (dump_file && (dump_flags & TDF_DETAILS))
3293         {
3294           fprintf (dump_file, "get_address_cost:\n");
3295           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3296                    GET_MODE_NAME (mem_mode),
3297                    data->min_offset);
3298           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3299                    GET_MODE_NAME (mem_mode),
3300                    data->max_offset);
3301         }
3302
3303       rat = 1;
3304       for (i = 2; i <= MAX_RATIO; i++)
3305         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3306           {
3307             rat = i;
3308             break;
3309           }
3310
3311       /* Compute the cost of various addressing modes.  */
3312       acost = 0;
3313       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3314       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3315
3316       if (HAVE_PRE_DECREMENT)
3317         {
3318           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3319           has_predec[mem_mode]
3320             = memory_address_addr_space_p (mem_mode, addr, as);
3321         }
3322       if (HAVE_POST_DECREMENT)
3323         {
3324           addr = gen_rtx_POST_DEC (address_mode, reg0);
3325           has_postdec[mem_mode]
3326             = memory_address_addr_space_p (mem_mode, addr, as);
3327         }
3328       if (HAVE_PRE_INCREMENT)
3329         {
3330           addr = gen_rtx_PRE_INC (address_mode, reg0);
3331           has_preinc[mem_mode]
3332             = memory_address_addr_space_p (mem_mode, addr, as);
3333         }
3334       if (HAVE_POST_INCREMENT)
3335         {
3336           addr = gen_rtx_POST_INC (address_mode, reg0);
3337           has_postinc[mem_mode]
3338             = memory_address_addr_space_p (mem_mode, addr, as);
3339         }
3340       for (i = 0; i < 16; i++)
3341         {
3342           sym_p = i & 1;
3343           var_p = (i >> 1) & 1;
3344           off_p = (i >> 2) & 1;
3345           rat_p = (i >> 3) & 1;
3346
3347           addr = reg0;
3348           if (rat_p)
3349             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3350                                    gen_int_mode (rat, address_mode));
3351
3352           if (var_p)
3353             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3354
3355           if (sym_p)
3356             {
3357               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3358               /* ??? We can run into trouble with some backends by presenting
3359                  it with symbols which haven't been properly passed through
3360                  targetm.encode_section_info.  By setting the local bit, we
3361                  enhance the probability of things working.  */
3362               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3363
3364               if (off_p)
3365                 base = gen_rtx_fmt_e (CONST, address_mode,
3366                                       gen_rtx_fmt_ee
3367                                         (PLUS, address_mode, base,
3368                                          gen_int_mode (off, address_mode)));
3369             }
3370           else if (off_p)
3371             base = gen_int_mode (off, address_mode);
3372           else
3373             base = NULL_RTX;
3374
3375           if (base)
3376             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3377
3378           start_sequence ();
3379           /* To avoid splitting addressing modes, pretend that no cse will
3380              follow.  */
3381           old_cse_not_expected = cse_not_expected;
3382           cse_not_expected = true;
3383           addr = memory_address_addr_space (mem_mode, addr, as);
3384           cse_not_expected = old_cse_not_expected;
3385           seq = get_insns ();
3386           end_sequence ();
3387
3388           acost = seq_cost (seq, speed);
3389           acost += address_cost (addr, mem_mode, as, speed);
3390
3391           if (!acost)
3392             acost = 1;
3393           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3394         }
3395
3396       /* On some targets, it is quite expensive to load symbol to a register,
3397          which makes addresses that contain symbols look much more expensive.
3398          However, the symbol will have to be loaded in any case before the
3399          loop (and quite likely we have it in register already), so it does not
3400          make much sense to penalize them too heavily.  So make some final
3401          tweaks for the SYMBOL_PRESENT modes:
3402
3403          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3404          var is cheaper, use this mode with small penalty.
3405          If VAR_PRESENT is true, try whether the mode with
3406          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3407          if this is the case, use it.  */
3408       add_c = add_cost (address_mode, speed);
3409       for (i = 0; i < 8; i++)
3410         {
3411           var_p = i & 1;
3412           off_p = (i >> 1) & 1;
3413           rat_p = (i >> 2) & 1;
3414
3415           acost = data->costs[0][1][off_p][rat_p] + 1;
3416           if (var_p)
3417             acost += add_c;
3418
3419           if (acost < data->costs[1][var_p][off_p][rat_p])
3420             data->costs[1][var_p][off_p][rat_p] = acost;
3421         }
3422
3423       if (dump_file && (dump_flags & TDF_DETAILS))
3424         {
3425           fprintf (dump_file, "Address costs:\n");
3426
3427           for (i = 0; i < 16; i++)
3428             {
3429               sym_p = i & 1;
3430               var_p = (i >> 1) & 1;
3431               off_p = (i >> 2) & 1;
3432               rat_p = (i >> 3) & 1;
3433
3434               fprintf (dump_file, "  ");
3435               if (sym_p)
3436                 fprintf (dump_file, "sym + ");
3437               if (var_p)
3438                 fprintf (dump_file, "var + ");
3439               if (off_p)
3440                 fprintf (dump_file, "cst + ");
3441               if (rat_p)
3442                 fprintf (dump_file, "rat * ");
3443
3444               acost = data->costs[sym_p][var_p][off_p][rat_p];
3445               fprintf (dump_file, "index costs %d\n", acost);
3446             }
3447           if (has_predec[mem_mode] || has_postdec[mem_mode]
3448               || has_preinc[mem_mode] || has_postinc[mem_mode])
3449             fprintf (dump_file, "  May include autoinc/dec\n");
3450           fprintf (dump_file, "\n");
3451         }
3452
3453       VEC_replace (address_cost_data, address_cost_data_list,
3454                    data_index, data);
3455     }
3456
3457   bits = GET_MODE_BITSIZE (address_mode);
3458   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3459   offset &= mask;
3460   if ((offset >> (bits - 1) & 1))
3461     offset |= ~mask;
3462   s_offset = offset;
3463
3464   autoinc = false;
3465   msize = GET_MODE_SIZE (mem_mode);
3466   autoinc_offset = offset;
3467   if (stmt_after_inc)
3468     autoinc_offset += ratio * cstep;
3469   if (symbol_present || var_present || ratio != 1)
3470     autoinc = false;
3471   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3472                && msize == cstep)
3473            || (has_postdec[mem_mode] && autoinc_offset == 0
3474                && msize == -cstep)
3475            || (has_preinc[mem_mode] && autoinc_offset == msize
3476                && msize == cstep)
3477            || (has_predec[mem_mode] && autoinc_offset == -msize
3478                && msize == -cstep))
3479     autoinc = true;
3480
3481   cost = 0;
3482   offset_p = (s_offset != 0
3483               && data->min_offset <= s_offset
3484               && s_offset <= data->max_offset);
3485   ratio_p = (ratio != 1
3486              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3487
3488   if (ratio != 1 && !ratio_p)
3489     cost += multiply_by_cost (ratio, address_mode, speed);
3490
3491   if (s_offset && !offset_p && !symbol_present)
3492     cost += add_cost (address_mode, speed);
3493
3494   if (may_autoinc)
3495     *may_autoinc = autoinc;
3496   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3497   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3498   return new_cost (cost + acost, complexity);
3499 }
3500
3501 /* Estimates cost of forcing expression EXPR into a variable.  */
3502
3503 static comp_cost
3504 force_expr_to_var_cost (tree expr, bool speed)
3505 {
3506   static bool costs_initialized = false;
3507   static unsigned integer_cost [2];
3508   static unsigned symbol_cost [2];
3509   static unsigned address_cost [2];
3510   tree op0, op1;
3511   comp_cost cost0, cost1, cost;
3512   enum machine_mode mode;
3513
3514   if (!costs_initialized)
3515     {
3516       tree type = build_pointer_type (integer_type_node);
3517       tree var, addr;
3518       rtx x;
3519       int i;
3520
3521       var = create_tmp_var_raw (integer_type_node, "test_var");
3522       TREE_STATIC (var) = 1;
3523       x = produce_memory_decl_rtl (var, NULL);
3524       SET_DECL_RTL (var, x);
3525
3526       addr = build1 (ADDR_EXPR, type, var);
3527
3528
3529       for (i = 0; i < 2; i++)
3530         {
3531           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3532                                                              2000), i);
3533
3534           symbol_cost[i] = computation_cost (addr, i) + 1;
3535
3536           address_cost[i]
3537             = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3538                                         addr,
3539                                         build_int_cst (sizetype, 2000)), i) + 1;
3540           if (dump_file && (dump_flags & TDF_DETAILS))
3541             {
3542               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3543               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3544               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3545               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3546               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3547               fprintf (dump_file, "\n");
3548             }
3549         }
3550
3551       costs_initialized = true;
3552     }
3553
3554   STRIP_NOPS (expr);
3555
3556   if (SSA_VAR_P (expr))
3557     return zero_cost;
3558
3559   if (is_gimple_min_invariant (expr))
3560     {
3561       if (TREE_CODE (expr) == INTEGER_CST)
3562         return new_cost (integer_cost [speed], 0);
3563
3564       if (TREE_CODE (expr) == ADDR_EXPR)
3565         {
3566           tree obj = TREE_OPERAND (expr, 0);
3567
3568           if (TREE_CODE (obj) == VAR_DECL
3569               || TREE_CODE (obj) == PARM_DECL
3570               || TREE_CODE (obj) == RESULT_DECL)
3571             return new_cost (symbol_cost [speed], 0);
3572         }
3573
3574       return new_cost (address_cost [speed], 0);
3575     }
3576
3577   switch (TREE_CODE (expr))
3578     {
3579     case POINTER_PLUS_EXPR:
3580     case PLUS_EXPR:
3581     case MINUS_EXPR:
3582     case MULT_EXPR:
3583       op0 = TREE_OPERAND (expr, 0);
3584       op1 = TREE_OPERAND (expr, 1);
3585       STRIP_NOPS (op0);
3586       STRIP_NOPS (op1);
3587
3588       if (is_gimple_val (op0))
3589         cost0 = zero_cost;
3590       else
3591         cost0 = force_expr_to_var_cost (op0, speed);
3592
3593       if (is_gimple_val (op1))
3594         cost1 = zero_cost;
3595       else
3596         cost1 = force_expr_to_var_cost (op1, speed);
3597
3598       break;
3599
3600     case NEGATE_EXPR:
3601       op0 = TREE_OPERAND (expr, 0);
3602       STRIP_NOPS (op0);
3603       op1 = NULL_TREE;
3604
3605       if (is_gimple_val (op0))
3606         cost0 = zero_cost;
3607       else
3608         cost0 = force_expr_to_var_cost (op0, speed);
3609
3610       cost1 = zero_cost;
3611       break;
3612
3613     default:
3614       /* Just an arbitrary value, FIXME.  */
3615       return new_cost (target_spill_cost[speed], 0);
3616     }
3617
3618   mode = TYPE_MODE (TREE_TYPE (expr));
3619   switch (TREE_CODE (expr))
3620     {
3621     case POINTER_PLUS_EXPR:
3622     case PLUS_EXPR:
3623     case MINUS_EXPR:
3624     case NEGATE_EXPR:
3625       cost = new_cost (add_cost (mode, speed), 0);
3626       break;
3627
3628     case MULT_EXPR:
3629       if (cst_and_fits_in_hwi (op0))
3630         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3631       else if (cst_and_fits_in_hwi (op1))
3632         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3633       else
3634         return new_cost (target_spill_cost [speed], 0);
3635       break;
3636
3637     default:
3638       gcc_unreachable ();
3639     }
3640
3641   cost = add_costs (cost, cost0);
3642   cost = add_costs (cost, cost1);
3643
3644   /* Bound the cost by target_spill_cost.  The parts of complicated
3645      computations often are either loop invariant or at least can
3646      be shared between several iv uses, so letting this grow without
3647      limits would not give reasonable results.  */
3648   if (cost.cost > (int) target_spill_cost [speed])
3649     cost.cost = target_spill_cost [speed];
3650
3651   return cost;
3652 }
3653
3654 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3655    invariants the computation depends on.  */
3656
3657 static comp_cost
3658 force_var_cost (struct ivopts_data *data,
3659                 tree expr, bitmap *depends_on)
3660 {
3661   if (depends_on)
3662     {
3663       fd_ivopts_data = data;
3664       walk_tree (&expr, find_depends, depends_on, NULL);
3665     }
3666
3667   return force_expr_to_var_cost (expr, data->speed);
3668 }
3669
3670 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3671    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3672    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3673    invariants the computation depends on.  */
3674
3675 static comp_cost
3676 split_address_cost (struct ivopts_data *data,
3677                     tree addr, bool *symbol_present, bool *var_present,
3678                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3679 {
3680   tree core;
3681   HOST_WIDE_INT bitsize;
3682   HOST_WIDE_INT bitpos;
3683   tree toffset;
3684   enum machine_mode mode;
3685   int unsignedp, volatilep;
3686
3687   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3688                               &unsignedp, &volatilep, false);
3689
3690   if (toffset != 0
3691       || bitpos % BITS_PER_UNIT != 0
3692       || TREE_CODE (core) != VAR_DECL)
3693     {
3694       *symbol_present = false;
3695       *var_present = true;
3696       fd_ivopts_data = data;
3697       walk_tree (&addr, find_depends, depends_on, NULL);
3698       return new_cost (target_spill_cost[data->speed], 0);
3699     }
3700
3701   *offset += bitpos / BITS_PER_UNIT;
3702   if (TREE_STATIC (core)
3703       || DECL_EXTERNAL (core))
3704     {
3705       *symbol_present = true;
3706       *var_present = false;
3707       return zero_cost;
3708     }
3709
3710   *symbol_present = false;
3711   *var_present = true;
3712   return zero_cost;
3713 }
3714
3715 /* Estimates cost of expressing difference of addresses E1 - E2 as
3716    var + symbol + offset.  The value of offset is added to OFFSET,
3717    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3718    part is missing.  DEPENDS_ON is a set of the invariants the computation
3719    depends on.  */
3720
3721 static comp_cost
3722 ptr_difference_cost (struct ivopts_data *data,
3723                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3724                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3725 {
3726   HOST_WIDE_INT diff = 0;
3727   aff_tree aff_e1, aff_e2;
3728   tree type;
3729
3730   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3731
3732   if (ptr_difference_const (e1, e2, &diff))
3733     {
3734       *offset += diff;
3735       *symbol_present = false;
3736       *var_present = false;
3737       return zero_cost;
3738     }
3739
3740   if (integer_zerop (e2))
3741     return split_address_cost (data, TREE_OPERAND (e1, 0),
3742                                symbol_present, var_present, offset, depends_on);
3743
3744   *symbol_present = false;
3745   *var_present = true;
3746
3747   type = signed_type_for (TREE_TYPE (e1));
3748   tree_to_aff_combination (e1, type, &aff_e1);
3749   tree_to_aff_combination (e2, type, &aff_e2);
3750   aff_combination_scale (&aff_e2, double_int_minus_one);
3751   aff_combination_add (&aff_e1, &aff_e2);
3752
3753   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3754 }
3755
3756 /* Estimates cost of expressing difference E1 - E2 as
3757    var + symbol + offset.  The value of offset is added to OFFSET,
3758    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3759    part is missing.  DEPENDS_ON is a set of the invariants the computation
3760    depends on.  */
3761
3762 static comp_cost
3763 difference_cost (struct ivopts_data *data,
3764                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3765                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3766 {
3767   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3768   unsigned HOST_WIDE_INT off1, off2;
3769   aff_tree aff_e1, aff_e2;
3770   tree type;
3771
3772   e1 = strip_offset (e1, &off1);
3773   e2 = strip_offset (e2, &off2);
3774   *offset += off1 - off2;
3775
3776   STRIP_NOPS (e1);
3777   STRIP_NOPS (e2);
3778
3779   if (TREE_CODE (e1) == ADDR_EXPR)
3780     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3781                                 offset, depends_on);
3782   *symbol_present = false;
3783
3784   if (operand_equal_p (e1, e2, 0))
3785     {
3786       *var_present = false;
3787       return zero_cost;
3788     }
3789
3790   *var_present = true;
3791
3792   if (integer_zerop (e2))
3793     return force_var_cost (data, e1, depends_on);
3794
3795   if (integer_zerop (e1))
3796     {
3797       comp_cost cost = force_var_cost (data, e2, depends_on);
3798       cost.cost += multiply_by_cost (-1, mode, data->speed);
3799       return cost;
3800     }
3801
3802   type = signed_type_for (TREE_TYPE (e1));
3803   tree_to_aff_combination (e1, type, &aff_e1);
3804   tree_to_aff_combination (e2, type, &aff_e2);
3805   aff_combination_scale (&aff_e2, double_int_minus_one);
3806   aff_combination_add (&aff_e1, &aff_e2);
3807
3808   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3809 }
3810
3811 /* Returns true if AFF1 and AFF2 are identical.  */
3812
3813 static bool
3814 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3815 {
3816   unsigned i;
3817
3818   if (aff1->n != aff2->n)
3819     return false;
3820
3821   for (i = 0; i < aff1->n; i++)
3822     {
3823       if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3824         return false;
3825
3826       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3827         return false;
3828     }
3829   return true;
3830 }
3831
3832 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3833    requires a new compiler generated temporary.  Returns -1 otherwise.
3834    ADDRESS_P is a flag indicating if the expression is for address
3835    computation.  */
3836
3837 static int
3838 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3839                             tree cbase, HOST_WIDE_INT ratio,
3840                             bool address_p)
3841 {
3842   aff_tree ubase_aff, cbase_aff;
3843   tree expr, ub, cb;
3844   struct iv_inv_expr_ent ent;
3845   struct iv_inv_expr_ent **slot;
3846
3847   STRIP_NOPS (ubase);
3848   STRIP_NOPS (cbase);
3849   ub = ubase;
3850   cb = cbase;
3851
3852   if ((TREE_CODE (ubase) == INTEGER_CST)
3853       && (TREE_CODE (cbase) == INTEGER_CST))
3854     return -1;
3855
3856   /* Strips the constant part. */
3857   if (TREE_CODE (ubase) == PLUS_EXPR
3858       || TREE_CODE (ubase) == MINUS_EXPR
3859       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3860     {
3861       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3862         ubase = TREE_OPERAND (ubase, 0);
3863     }
3864
3865   /* Strips the constant part. */
3866   if (TREE_CODE (cbase) == PLUS_EXPR
3867       || TREE_CODE (cbase) == MINUS_EXPR
3868       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3869     {
3870       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3871         cbase = TREE_OPERAND (cbase, 0);
3872     }
3873
3874   if (address_p)
3875     {
3876       if (((TREE_CODE (ubase) == SSA_NAME)
3877            || (TREE_CODE (ubase) == ADDR_EXPR
3878                && is_gimple_min_invariant (ubase)))
3879           && (TREE_CODE (cbase) == INTEGER_CST))
3880         return -1;
3881
3882       if (((TREE_CODE (cbase) == SSA_NAME)
3883            || (TREE_CODE (cbase) == ADDR_EXPR
3884                && is_gimple_min_invariant (cbase)))
3885           && (TREE_CODE (ubase) == INTEGER_CST))
3886         return -1;
3887     }
3888
3889   if (ratio == 1)
3890     {
3891       if(operand_equal_p (ubase, cbase, 0))
3892         return -1;
3893
3894       if (TREE_CODE (ubase) == ADDR_EXPR
3895           && TREE_CODE (cbase) == ADDR_EXPR)
3896         {
3897           tree usym, csym;
3898
3899           usym = TREE_OPERAND (ubase, 0);
3900           csym = TREE_OPERAND (cbase, 0);
3901           if (TREE_CODE (usym) == ARRAY_REF)
3902             {
3903               tree ind = TREE_OPERAND (usym, 1);
3904               if (TREE_CODE (ind) == INTEGER_CST
3905                   && host_integerp (ind, 0)
3906                   && TREE_INT_CST_LOW (ind) == 0)
3907                 usym = TREE_OPERAND (usym, 0);
3908             }
3909           if (TREE_CODE (csym) == ARRAY_REF)
3910             {
3911               tree ind = TREE_OPERAND (csym, 1);
3912               if (TREE_CODE (ind) == INTEGER_CST
3913                   && host_integerp (ind, 0)
3914                   && TREE_INT_CST_LOW (ind) == 0)
3915                 csym = TREE_OPERAND (csym, 0);
3916             }
3917           if (operand_equal_p (usym, csym, 0))
3918             return -1;
3919         }
3920       /* Now do more complex comparison  */
3921       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3922       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3923       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3924         return -1;
3925     }
3926
3927   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3928   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3929
3930   aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3931   aff_combination_add (&ubase_aff, &cbase_aff);
3932   expr = aff_combination_to_tree (&ubase_aff);
3933   ent.expr = expr;
3934   ent.hash = iterative_hash_expr (expr, 0);
3935   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3936                                                      &ent, INSERT);
3937   if (*slot)
3938     return (*slot)->id;
3939
3940   *slot = XNEW (struct iv_inv_expr_ent);
3941   (*slot)->expr = expr;
3942   (*slot)->hash = ent.hash;
3943   (*slot)->id = data->inv_expr_id++;
3944   return  (*slot)->id;
3945 }
3946
3947
3948
3949 /* Determines the cost of the computation by that USE is expressed
3950    from induction variable CAND.  If ADDRESS_P is true, we just need
3951    to create an address from it, otherwise we want to get it into
3952    register.  A set of invariants we depend on is stored in
3953    DEPENDS_ON.  AT is the statement at that the value is computed.
3954    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3955    addressing is likely.  */
3956
3957 static comp_cost
3958 get_computation_cost_at (struct ivopts_data *data,
3959                          struct iv_use *use, struct iv_cand *cand,
3960                          bool address_p, bitmap *depends_on, gimple at,
3961                          bool *can_autoinc,
3962                          int *inv_expr_id)
3963 {
3964   tree ubase = use->iv->base, ustep = use->iv->step;
3965   tree cbase, cstep;
3966   tree utype = TREE_TYPE (ubase), ctype;
3967   unsigned HOST_WIDE_INT cstepi, offset = 0;
3968   HOST_WIDE_INT ratio, aratio;
3969   bool var_present, symbol_present, stmt_is_after_inc;
3970   comp_cost cost;
3971   double_int rat;
3972   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3973
3974   *depends_on = NULL;
3975
3976   /* Only consider real candidates.  */
3977   if (!cand->iv)
3978     return infinite_cost;
3979
3980   cbase = cand->iv->base;
3981   cstep = cand->iv->step;
3982   ctype = TREE_TYPE (cbase);
3983
3984   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3985     {
3986       /* We do not have a precision to express the values of use.  */
3987       return infinite_cost;
3988     }
3989
3990   if (address_p)
3991     {
3992       /* Do not try to express address of an object with computation based
3993          on address of a different object.  This may cause problems in rtl
3994          level alias analysis (that does not expect this to be happening,
3995          as this is illegal in C), and would be unlikely to be useful
3996          anyway.  */
3997       if (use->iv->base_object
3998           && cand->iv->base_object
3999           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4000         return infinite_cost;
4001     }
4002
4003   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4004     {
4005       /* TODO -- add direct handling of this case.  */
4006       goto fallback;
4007     }
4008
4009   /* CSTEPI is removed from the offset in case statement is after the
4010      increment.  If the step is not constant, we use zero instead.
4011      This is a bit imprecise (there is the extra addition), but
4012      redundancy elimination is likely to transform the code so that
4013      it uses value of the variable before increment anyway,
4014      so it is not that much unrealistic.  */
4015   if (cst_and_fits_in_hwi (cstep))
4016     cstepi = int_cst_value (cstep);
4017   else
4018     cstepi = 0;
4019
4020   if (!constant_multiple_of (ustep, cstep, &rat))
4021     return infinite_cost;
4022
4023   if (double_int_fits_in_shwi_p (rat))
4024     ratio = double_int_to_shwi (rat);
4025   else
4026     return infinite_cost;
4027
4028   STRIP_NOPS (cbase);
4029   ctype = TREE_TYPE (cbase);
4030
4031   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4032
4033   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4034      or ratio == 1, it is better to handle this like
4035
4036      ubase - ratio * cbase + ratio * var
4037
4038      (also holds in the case ratio == -1, TODO.  */
4039
4040   if (cst_and_fits_in_hwi (cbase))
4041     {
4042       offset = - ratio * int_cst_value (cbase);
4043       cost = difference_cost (data,
4044                               ubase, build_int_cst (utype, 0),
4045                               &symbol_present, &var_present, &offset,
4046                               depends_on);
4047       cost.cost /= avg_loop_niter (data->current_loop);
4048     }
4049   else if (ratio == 1)
4050     {
4051       tree real_cbase = cbase;
4052
4053       /* Check to see if any adjustment is needed.  */
4054       if (cstepi == 0 && stmt_is_after_inc)
4055         {
4056           aff_tree real_cbase_aff;
4057           aff_tree cstep_aff;
4058
4059           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4060                                    &real_cbase_aff);
4061           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4062
4063           aff_combination_add (&real_cbase_aff, &cstep_aff);
4064           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4065         }
4066
4067       cost = difference_cost (data,
4068                               ubase, real_cbase,
4069                               &symbol_present, &var_present, &offset,
4070                               depends_on);
4071       cost.cost /= avg_loop_niter (data->current_loop);
4072     }
4073   else if (address_p
4074            && !POINTER_TYPE_P (ctype)
4075            && multiplier_allowed_in_address_p
4076                 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4077                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4078     {
4079       cbase
4080         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4081       cost = difference_cost (data,
4082                               ubase, cbase,
4083                               &symbol_present, &var_present, &offset,
4084                               depends_on);
4085       cost.cost /= avg_loop_niter (data->current_loop);
4086     }
4087   else
4088     {
4089       cost = force_var_cost (data, cbase, depends_on);
4090       cost = add_costs (cost,
4091                         difference_cost (data,
4092                                          ubase, build_int_cst (utype, 0),
4093                                          &symbol_present, &var_present,
4094                                          &offset, depends_on));
4095       cost.cost /= avg_loop_niter (data->current_loop);
4096       cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4097     }
4098
4099   if (inv_expr_id)
4100     {
4101       *inv_expr_id =
4102           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4103       /* Clear depends on.  */
4104       if (*inv_expr_id != -1 && depends_on && *depends_on)
4105         bitmap_clear (*depends_on);
4106     }
4107
4108   /* If we are after the increment, the value of the candidate is higher by
4109      one iteration.  */
4110   if (stmt_is_after_inc)
4111     offset -= ratio * cstepi;
4112
4113   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4114      (symbol/var1/const parts may be omitted).  If we are looking for an
4115      address, find the cost of addressing this.  */
4116   if (address_p)
4117     return add_costs (cost,
4118                       get_address_cost (symbol_present, var_present,
4119                                         offset, ratio, cstepi,
4120                                         TYPE_MODE (TREE_TYPE (utype)),
4121                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4122                                         speed, stmt_is_after_inc,
4123                                         can_autoinc));
4124
4125   /* Otherwise estimate the costs for computing the expression.  */
4126   if (!symbol_present && !var_present && !offset)
4127     {
4128       if (ratio != 1)
4129         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4130       return cost;
4131     }
4132
4133   /* Symbol + offset should be compile-time computable so consider that they
4134       are added once to the variable, if present.  */
4135   if (var_present && (symbol_present || offset))
4136     cost.cost += adjust_setup_cost (data,
4137                                     add_cost (TYPE_MODE (ctype), speed));
4138
4139   /* Having offset does not affect runtime cost in case it is added to
4140      symbol, but it increases complexity.  */
4141   if (offset)
4142     cost.complexity++;
4143
4144   cost.cost += add_cost (TYPE_MODE (ctype), speed);
4145
4146   aratio = ratio > 0 ? ratio : -ratio;
4147   if (aratio != 1)
4148     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4149   return cost;
4150
4151 fallback:
4152   if (can_autoinc)
4153     *can_autoinc = false;
4154
4155   {
4156     /* Just get the expression, expand it and measure the cost.  */
4157     tree comp = get_computation_at (data->current_loop, use, cand, at);
4158
4159     if (!comp)
4160       return infinite_cost;
4161
4162     if (address_p)
4163       comp = build_simple_mem_ref (comp);
4164
4165     return new_cost (computation_cost (comp, speed), 0);
4166   }
4167 }
4168
4169 /* Determines the cost of the computation by that USE is expressed
4170    from induction variable CAND.  If ADDRESS_P is true, we just need
4171    to create an address from it, otherwise we want to get it into
4172    register.  A set of invariants we depend on is stored in
4173    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4174    autoinc addressing is likely.  */
4175
4176 static comp_cost
4177 get_computation_cost (struct ivopts_data *data,
4178                       struct iv_use *use, struct iv_cand *cand,
4179                       bool address_p, bitmap *depends_on,
4180                       bool *can_autoinc, int *inv_expr_id)
4181 {
4182   return get_computation_cost_at (data,
4183                                   use, cand, address_p, depends_on, use->stmt,
4184                                   can_autoinc, inv_expr_id);
4185 }
4186
4187 /* Determines cost of basing replacement of USE on CAND in a generic
4188    expression.  */
4189
4190 static bool
4191 determine_use_iv_cost_generic (struct ivopts_data *data,
4192                                struct iv_use *use, struct iv_cand *cand)
4193 {
4194   bitmap depends_on;
4195   comp_cost cost;
4196   int inv_expr_id = -1;
4197
4198   /* The simple case first -- if we need to express value of the preserved
4199      original biv, the cost is 0.  This also prevents us from counting the
4200      cost of increment twice -- once at this use and once in the cost of
4201      the candidate.  */
4202   if (cand->pos == IP_ORIGINAL
4203       && cand->incremented_at == use->stmt)
4204     {
4205       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4206       return true;
4207     }
4208
4209   cost = get_computation_cost (data, use, cand, false, &depends_on,
4210                                NULL, &inv_expr_id);
4211
4212   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4213                    inv_expr_id);
4214
4215   return !infinite_cost_p (cost);
4216 }
4217
4218 /* Determines cost of basing replacement of USE on CAND in an address.  */
4219
4220 static bool
4221 determine_use_iv_cost_address (struct ivopts_data *data,
4222                                struct iv_use *use, struct iv_cand *cand)
4223 {
4224   bitmap depends_on;
4225   bool can_autoinc;
4226   int inv_expr_id = -1;
4227   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4228                                          &can_autoinc, &inv_expr_id);
4229
4230   if (cand->ainc_use == use)
4231     {
4232       if (can_autoinc)
4233         cost.cost -= cand->cost_step;
4234       /* If we generated the candidate solely for exploiting autoincrement
4235          opportunities, and it turns out it can't be used, set the cost to
4236          infinity to make sure we ignore it.  */
4237       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4238         cost = infinite_cost;
4239     }
4240   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4241                    inv_expr_id);
4242
4243   return !infinite_cost_p (cost);
4244 }
4245
4246 /* Computes value of candidate CAND at position AT in iteration NITER, and
4247    stores it to VAL.  */
4248
4249 static void
4250 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4251                aff_tree *val)
4252 {
4253   aff_tree step, delta, nit;
4254   struct iv *iv = cand->iv;
4255   tree type = TREE_TYPE (iv->base);
4256   tree steptype = type;
4257   if (POINTER_TYPE_P (type))
4258     steptype = sizetype;
4259
4260   tree_to_aff_combination (iv->step, steptype, &step);
4261   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4262   aff_combination_convert (&nit, steptype);
4263   aff_combination_mult (&nit, &step, &delta);
4264   if (stmt_after_increment (loop, cand, at))
4265     aff_combination_add (&delta, &step);
4266
4267   tree_to_aff_combination (iv->base, type, val);
4268   aff_combination_add (val, &delta);
4269 }
4270
4271 /* Returns period of induction variable iv.  */
4272
4273 static tree
4274 iv_period (struct iv *iv)
4275 {
4276   tree step = iv->step, period, type;
4277   tree pow2div;
4278
4279   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4280
4281   type = unsigned_type_for (TREE_TYPE (step));
4282   /* Period of the iv is lcm (step, type_range)/step -1,
4283      i.e., N*type_range/step - 1. Since type range is power
4284      of two, N == (step >> num_of_ending_zeros_binary (step),
4285      so the final result is
4286
4287        (type_range >> num_of_ending_zeros_binary (step)) - 1
4288
4289   */
4290   pow2div = num_ending_zeros (step);
4291
4292   period = build_low_bits_mask (type,
4293                                 (TYPE_PRECISION (type)
4294                                  - tree_low_cst (pow2div, 1)));
4295
4296   return period;
4297 }
4298
4299 /* Returns the comparison operator used when eliminating the iv USE.  */
4300
4301 static enum tree_code
4302 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4303 {
4304   struct loop *loop = data->current_loop;
4305   basic_block ex_bb;
4306   edge exit;
4307
4308   ex_bb = gimple_bb (use->stmt);
4309   exit = EDGE_SUCC (ex_bb, 0);
4310   if (flow_bb_inside_loop_p (loop, exit->dest))
4311     exit = EDGE_SUCC (ex_bb, 1);
4312
4313   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4314 }
4315
4316 /* Check whether it is possible to express the condition in USE by comparison
4317    of candidate CAND.  If so, store the value compared with to BOUND.  */
4318
4319 static bool
4320 may_eliminate_iv (struct ivopts_data *data,
4321                   struct iv_use *use, struct iv_cand *cand, tree *bound)
4322 {
4323   basic_block ex_bb;
4324   edge exit;
4325   tree nit, period;
4326   struct loop *loop = data->current_loop;
4327   aff_tree bnd;
4328   struct tree_niter_desc *desc = NULL;
4329
4330   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4331     return false;
4332
4333   /* For now works only for exits that dominate the loop latch.
4334      TODO: extend to other conditions inside loop body.  */
4335   ex_bb = gimple_bb (use->stmt);
4336   if (use->stmt != last_stmt (ex_bb)
4337       || gimple_code (use->stmt) != GIMPLE_COND
4338       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4339     return false;
4340
4341   exit = EDGE_SUCC (ex_bb, 0);
4342   if (flow_bb_inside_loop_p (loop, exit->dest))
4343     exit = EDGE_SUCC (ex_bb, 1);
4344   if (flow_bb_inside_loop_p (loop, exit->dest))
4345     return false;
4346
4347   nit = niter_for_exit (data, exit, &desc);
4348   if (!nit)
4349     return false;
4350
4351   /* Determine whether we can use the variable to test the exit condition.
4352      This is the case iff the period of the induction variable is greater
4353      than the number of iterations for which the exit condition is true.  */
4354   period = iv_period (cand->iv);
4355
4356   /* If the number of iterations is constant, compare against it directly.  */
4357   if (TREE_CODE (nit) == INTEGER_CST)
4358     {
4359       /* See cand_value_at.  */
4360       if (stmt_after_increment (loop, cand, use->stmt))
4361         {
4362           if (!tree_int_cst_lt (nit, period))
4363             return false;
4364         }
4365       else
4366         {
4367           if (tree_int_cst_lt (period, nit))
4368             return false;
4369         }
4370     }
4371
4372   /* If not, and if this is the only possible exit of the loop, see whether
4373      we can get a conservative estimate on the number of iterations of the
4374      entire loop and compare against that instead.  */
4375   else
4376     {
4377       double_int period_value, max_niter;
4378
4379       max_niter = desc->max;
4380       if (stmt_after_increment (loop, cand, use->stmt))
4381         max_niter = double_int_add (max_niter, double_int_one);
4382       period_value = tree_to_double_int (period);
4383       if (double_int_ucmp (max_niter, period_value) > 0)
4384         {
4385           /* See if we can take advantage of infered loop bound information.  */
4386           if (loop_only_exit_p (loop, exit))
4387             {
4388               if (!estimated_loop_iterations (loop, true, &max_niter))
4389                 return false;
4390               /* The loop bound is already adjusted by adding 1.  */
4391               if (double_int_ucmp (max_niter, period_value) > 0)
4392                 return false;
4393             }
4394           else
4395             return false;
4396         }
4397     }
4398
4399   cand_value_at (loop, cand, use->stmt, nit, &bnd);
4400
4401   *bound = aff_combination_to_tree (&bnd);
4402   /* It is unlikely that computing the number of iterations using division
4403      would be more profitable than keeping the original induction variable.  */
4404   if (expression_expensive_p (*bound))
4405     return false;
4406   return true;
4407 }
4408
4409
4410 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4411
4412 static bool
4413 determine_use_iv_cost_condition (struct ivopts_data *data,
4414                                  struct iv_use *use, struct iv_cand *cand)
4415 {
4416   tree bound = NULL_TREE;
4417   struct iv *cmp_iv;
4418   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4419   comp_cost elim_cost, express_cost, cost;
4420   bool ok;
4421   int inv_expr_id = -1;
4422   tree *control_var, *bound_cst;
4423
4424   /* Only consider real candidates.  */
4425   if (!cand->iv)
4426     {
4427       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4428       return false;
4429     }
4430
4431   /* Try iv elimination.  */
4432   if (may_eliminate_iv (data, use, cand, &bound))
4433     {
4434       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4435       /* The bound is a loop invariant, so it will be only computed
4436          once.  */
4437       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4438     }
4439   else
4440     elim_cost = infinite_cost;
4441
4442   /* Try expressing the original giv.  If it is compared with an invariant,
4443      note that we cannot get rid of it.  */
4444   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4445                               NULL, &cmp_iv);
4446   gcc_assert (ok);
4447
4448   /* When the condition is a comparison of the candidate IV against
4449      zero, prefer this IV.
4450
4451      TODO: The constant that we're substracting from the cost should
4452      be target-dependent.  This information should be added to the
4453      target costs for each backend.  */
4454   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4455       && integer_zerop (*bound_cst)
4456       && (operand_equal_p (*control_var, cand->var_after, 0)
4457           || operand_equal_p (*control_var, cand->var_before, 0)))
4458     elim_cost.cost -= 1;
4459
4460   express_cost = get_computation_cost (data, use, cand, false,
4461                                        &depends_on_express, NULL,
4462                                        &inv_expr_id);
4463   fd_ivopts_data = data;
4464   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4465
4466   /* Choose the better approach, preferring the eliminated IV. */
4467   if (compare_costs (elim_cost, express_cost) <= 0)
4468     {
4469       cost = elim_cost;
4470       depends_on = depends_on_elim;
4471       depends_on_elim = NULL;
4472     }
4473   else
4474     {
4475       cost = express_cost;
4476       depends_on = depends_on_express;
4477       depends_on_express = NULL;
4478       bound = NULL_TREE;
4479     }
4480
4481   set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4482
4483   if (depends_on_elim)
4484     BITMAP_FREE (depends_on_elim);
4485   if (depends_on_express)
4486     BITMAP_FREE (depends_on_express);
4487
4488   return !infinite_cost_p (cost);
4489 }
4490
4491 /* Determines cost of basing replacement of USE on CAND.  Returns false
4492    if USE cannot be based on CAND.  */
4493
4494 static bool
4495 determine_use_iv_cost (struct ivopts_data *data,
4496                        struct iv_use *use, struct iv_cand *cand)
4497 {
4498   switch (use->type)
4499     {
4500     case USE_NONLINEAR_EXPR:
4501       return determine_use_iv_cost_generic (data, use, cand);
4502
4503     case USE_ADDRESS:
4504       return determine_use_iv_cost_address (data, use, cand);
4505
4506     case USE_COMPARE:
4507       return determine_use_iv_cost_condition (data, use, cand);
4508
4509     default:
4510       gcc_unreachable ();
4511     }
4512 }
4513
4514 /* Return true if get_computation_cost indicates that autoincrement is
4515    a possibility for the pair of USE and CAND, false otherwise.  */
4516
4517 static bool
4518 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4519                            struct iv_cand *cand)
4520 {
4521   bitmap depends_on;
4522   bool can_autoinc;
4523   comp_cost cost;
4524
4525   if (use->type != USE_ADDRESS)
4526     return false;
4527
4528   cost = get_computation_cost (data, use, cand, true, &depends_on,
4529                                &can_autoinc, NULL);
4530
4531   BITMAP_FREE (depends_on);
4532
4533   return !infinite_cost_p (cost) && can_autoinc;
4534 }
4535
4536 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4537    use that allows autoincrement, and set their AINC_USE if possible.  */
4538
4539 static void
4540 set_autoinc_for_original_candidates (struct ivopts_data *data)
4541 {
4542   unsigned i, j;
4543
4544   for (i = 0; i < n_iv_cands (data); i++)
4545     {
4546       struct iv_cand *cand = iv_cand (data, i);
4547       struct iv_use *closest = NULL;
4548       if (cand->pos != IP_ORIGINAL)
4549         continue;
4550       for (j = 0; j < n_iv_uses (data); j++)
4551         {
4552           struct iv_use *use = iv_use (data, j);
4553           unsigned uid = gimple_uid (use->stmt);
4554           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4555               || uid > gimple_uid (cand->incremented_at))
4556             continue;
4557           if (closest == NULL || uid > gimple_uid (closest->stmt))
4558             closest = use;
4559         }
4560       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4561         continue;
4562       cand->ainc_use = closest;
4563     }
4564 }
4565
4566 /* Finds the candidates for the induction variables.  */
4567
4568 static void
4569 find_iv_candidates (struct ivopts_data *data)
4570 {
4571   /* Add commonly used ivs.  */
4572   add_standard_iv_candidates (data);
4573
4574   /* Add old induction variables.  */
4575   add_old_ivs_candidates (data);
4576
4577   /* Add induction variables derived from uses.  */
4578   add_derived_ivs_candidates (data);
4579
4580   set_autoinc_for_original_candidates (data);
4581
4582   /* Record the important candidates.  */
4583   record_important_candidates (data);
4584 }
4585
4586 /* Determines costs of basing the use of the iv on an iv candidate.  */
4587
4588 static void
4589 determine_use_iv_costs (struct ivopts_data *data)
4590 {
4591   unsigned i, j;
4592   struct iv_use *use;
4593   struct iv_cand *cand;
4594   bitmap to_clear = BITMAP_ALLOC (NULL);
4595
4596   alloc_use_cost_map (data);
4597
4598   for (i = 0; i < n_iv_uses (data); i++)
4599     {
4600       use = iv_use (data, i);
4601
4602       if (data->consider_all_candidates)
4603         {
4604           for (j = 0; j < n_iv_cands (data); j++)
4605             {
4606               cand = iv_cand (data, j);
4607               determine_use_iv_cost (data, use, cand);
4608             }
4609         }
4610       else
4611         {
4612           bitmap_iterator bi;
4613
4614           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4615             {
4616               cand = iv_cand (data, j);
4617               if (!determine_use_iv_cost (data, use, cand))
4618                 bitmap_set_bit (to_clear, j);
4619             }
4620
4621           /* Remove the candidates for that the cost is infinite from
4622              the list of related candidates.  */
4623           bitmap_and_compl_into (use->related_cands, to_clear);
4624           bitmap_clear (to_clear);
4625         }
4626     }
4627
4628   BITMAP_FREE (to_clear);
4629
4630   if (dump_file && (dump_flags & TDF_DETAILS))
4631     {
4632       fprintf (dump_file, "Use-candidate costs:\n");
4633
4634       for (i = 0; i < n_iv_uses (data); i++)
4635         {
4636           use = iv_use (data, i);
4637
4638           fprintf (dump_file, "Use %d:\n", i);
4639           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4640           for (j = 0; j < use->n_map_members; j++)
4641             {
4642               if (!use->cost_map[j].cand
4643                   || infinite_cost_p (use->cost_map[j].cost))
4644                 continue;
4645
4646               fprintf (dump_file, "  %d\t%d\t%d\t",
4647                        use->cost_map[j].cand->id,
4648                        use->cost_map[j].cost.cost,
4649                        use->cost_map[j].cost.complexity);
4650               if (use->cost_map[j].depends_on)
4651                 bitmap_print (dump_file,
4652                               use->cost_map[j].depends_on, "","");
4653               if (use->cost_map[j].inv_expr_id != -1)
4654                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4655               fprintf (dump_file, "\n");
4656             }
4657
4658           fprintf (dump_file, "\n");
4659         }
4660       fprintf (dump_file, "\n");
4661     }
4662 }
4663
4664 /* Determines cost of the candidate CAND.  */
4665
4666 static void
4667 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4668 {
4669   comp_cost cost_base;
4670   unsigned cost, cost_step;
4671   tree base;
4672
4673   if (!cand->iv)
4674     {
4675       cand->cost = 0;
4676       return;
4677     }
4678
4679   /* There are two costs associated with the candidate -- its increment
4680      and its initialization.  The second is almost negligible for any loop
4681      that rolls enough, so we take it just very little into account.  */
4682
4683   base = cand->iv->base;
4684   cost_base = force_var_cost (data, base, NULL);
4685   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4686
4687   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4688
4689   /* Prefer the original ivs unless we may gain something by replacing it.
4690      The reason is to make debugging simpler; so this is not relevant for
4691      artificial ivs created by other optimization passes.  */
4692   if (cand->pos != IP_ORIGINAL
4693       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4694     cost++;
4695
4696   /* Prefer not to insert statements into latch unless there are some
4697      already (so that we do not create unnecessary jumps).  */
4698   if (cand->pos == IP_END
4699       && empty_block_p (ip_end_pos (data->current_loop)))
4700     cost++;
4701
4702   cand->cost = cost;
4703   cand->cost_step = cost_step;
4704 }
4705
4706 /* Determines costs of computation of the candidates.  */
4707
4708 static void
4709 determine_iv_costs (struct ivopts_data *data)
4710 {
4711   unsigned i;
4712
4713   if (dump_file && (dump_flags & TDF_DETAILS))
4714     {
4715       fprintf (dump_file, "Candidate costs:\n");
4716       fprintf (dump_file, "  cand\tcost\n");
4717     }
4718
4719   for (i = 0; i < n_iv_cands (data); i++)
4720     {
4721       struct iv_cand *cand = iv_cand (data, i);
4722
4723       determine_iv_cost (data, cand);
4724
4725       if (dump_file && (dump_flags & TDF_DETAILS))
4726         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4727     }
4728
4729   if (dump_file && (dump_flags & TDF_DETAILS))
4730     fprintf (dump_file, "\n");
4731 }
4732
4733 /* Calculates cost for having SIZE induction variables.  */
4734
4735 static unsigned
4736 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4737 {
4738   /* We add size to the cost, so that we prefer eliminating ivs
4739      if possible.  */
4740   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4741                                             data->body_includes_call);
4742 }
4743
4744 /* For each size of the induction variable set determine the penalty.  */
4745
4746 static void
4747 determine_set_costs (struct ivopts_data *data)
4748 {
4749   unsigned j, n;
4750   gimple phi;
4751   gimple_stmt_iterator psi;
4752   tree op;
4753   struct loop *loop = data->current_loop;
4754   bitmap_iterator bi;
4755
4756   if (dump_file && (dump_flags & TDF_DETAILS))
4757     {
4758       fprintf (dump_file, "Global costs:\n");
4759       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4760       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
4761       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
4762       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
4763     }
4764
4765   n = 0;
4766   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4767     {
4768       phi = gsi_stmt (psi);
4769       op = PHI_RESULT (phi);
4770
4771       if (!is_gimple_reg (op))
4772         continue;
4773
4774       if (get_iv (data, op))
4775         continue;
4776
4777       n++;
4778     }
4779
4780   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4781     {
4782       struct version_info *info = ver_info (data, j);
4783
4784       if (info->inv_id && info->has_nonlin_use)
4785         n++;
4786     }
4787
4788   data->regs_used = n;
4789   if (dump_file && (dump_flags & TDF_DETAILS))
4790     fprintf (dump_file, "  regs_used %d\n", n);
4791
4792   if (dump_file && (dump_flags & TDF_DETAILS))
4793     {
4794       fprintf (dump_file, "  cost for size:\n");
4795       fprintf (dump_file, "  ivs\tcost\n");
4796       for (j = 0; j <= 2 * target_avail_regs; j++)
4797         fprintf (dump_file, "  %d\t%d\n", j,
4798                  ivopts_global_cost_for_size (data, j));
4799       fprintf (dump_file, "\n");
4800     }
4801 }
4802
4803 /* Returns true if A is a cheaper cost pair than B.  */
4804
4805 static bool
4806 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4807 {
4808   int cmp;
4809
4810   if (!a)
4811     return false;
4812
4813   if (!b)
4814     return true;
4815
4816   cmp = compare_costs (a->cost, b->cost);
4817   if (cmp < 0)
4818     return true;
4819
4820   if (cmp > 0)
4821     return false;
4822
4823   /* In case the costs are the same, prefer the cheaper candidate.  */
4824   if (a->cand->cost < b->cand->cost)
4825     return true;
4826
4827   return false;
4828 }
4829
4830
4831 /* Returns candidate by that USE is expressed in IVS.  */
4832
4833 static struct cost_pair *
4834 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4835 {
4836   return ivs->cand_for_use[use->id];
4837 }
4838
4839 /* Computes the cost field of IVS structure.  */
4840
4841 static void
4842 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4843 {
4844   comp_cost cost = ivs->cand_use_cost;
4845
4846   cost.cost += ivs->cand_cost;
4847
4848   cost.cost += ivopts_global_cost_for_size (data,
4849                                             ivs->n_regs + ivs->num_used_inv_expr);
4850
4851   ivs->cost = cost;
4852 }
4853
4854 /* Remove invariants in set INVS to set IVS.  */
4855
4856 static void
4857 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4858 {
4859   bitmap_iterator bi;
4860   unsigned iid;
4861
4862   if (!invs)
4863     return;
4864
4865   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4866     {
4867       ivs->n_invariant_uses[iid]--;
4868       if (ivs->n_invariant_uses[iid] == 0)
4869         ivs->n_regs--;
4870     }
4871 }
4872
4873 /* Set USE not to be expressed by any candidate in IVS.  */
4874
4875 static void
4876 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4877                  struct iv_use *use)
4878 {
4879   unsigned uid = use->id, cid;
4880   struct cost_pair *cp;
4881
4882   cp = ivs->cand_for_use[uid];
4883   if (!cp)
4884     return;
4885   cid = cp->cand->id;
4886
4887   ivs->bad_uses++;
4888   ivs->cand_for_use[uid] = NULL;
4889   ivs->n_cand_uses[cid]--;
4890
4891   if (ivs->n_cand_uses[cid] == 0)
4892     {
4893       bitmap_clear_bit (ivs->cands, cid);
4894       /* Do not count the pseudocandidates.  */
4895       if (cp->cand->iv)
4896         ivs->n_regs--;
4897       ivs->n_cands--;
4898       ivs->cand_cost -= cp->cand->cost;
4899
4900       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4901     }
4902
4903   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4904
4905   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4906
4907   if (cp->inv_expr_id != -1)
4908     {
4909       ivs->used_inv_expr[cp->inv_expr_id]--;
4910       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
4911         ivs->num_used_inv_expr--;
4912     }
4913   iv_ca_recount_cost (data, ivs);
4914 }
4915
4916 /* Add invariants in set INVS to set IVS.  */
4917
4918 static void
4919 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4920 {
4921   bitmap_iterator bi;
4922   unsigned iid;
4923
4924   if (!invs)
4925     return;
4926
4927   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4928     {
4929       ivs->n_invariant_uses[iid]++;
4930       if (ivs->n_invariant_uses[iid] == 1)
4931         ivs->n_regs++;
4932     }
4933 }
4934
4935 /* Set cost pair for USE in set IVS to CP.  */
4936
4937 static void
4938 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4939               struct iv_use *use, struct cost_pair *cp)
4940 {
4941   unsigned uid = use->id, cid;
4942
4943   if (ivs->cand_for_use[uid] == cp)
4944     return;
4945
4946   if (ivs->cand_for_use[uid])
4947     iv_ca_set_no_cp (data, ivs, use);
4948
4949   if (cp)
4950     {
4951       cid = cp->cand->id;
4952
4953       ivs->bad_uses--;
4954       ivs->cand_for_use[uid] = cp;
4955       ivs->n_cand_uses[cid]++;
4956       if (ivs->n_cand_uses[cid] == 1)
4957         {
4958           bitmap_set_bit (ivs->cands, cid);
4959           /* Do not count the pseudocandidates.  */
4960           if (cp->cand->iv)
4961             ivs->n_regs++;
4962           ivs->n_cands++;
4963           ivs->cand_cost += cp->cand->cost;
4964
4965           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4966         }
4967
4968       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4969       iv_ca_set_add_invariants (ivs, cp->depends_on);
4970
4971       if (cp->inv_expr_id != -1)
4972         {
4973           ivs->used_inv_expr[cp->inv_expr_id]++;
4974           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
4975             ivs->num_used_inv_expr++;
4976         }
4977       iv_ca_recount_cost (data, ivs);
4978     }
4979 }
4980
4981 /* Extend set IVS by expressing USE by some of the candidates in it
4982    if possible. All important candidates will be considered
4983    if IMPORTANT_CANDIDATES is true.  */
4984
4985 static void
4986 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4987                struct iv_use *use, bool important_candidates)
4988 {
4989   struct cost_pair *best_cp = NULL, *cp;
4990   bitmap_iterator bi;
4991   bitmap cands;
4992   unsigned i;
4993
4994   gcc_assert (ivs->upto >= use->id);
4995
4996   if (ivs->upto == use->id)
4997     {
4998       ivs->upto++;
4999       ivs->bad_uses++;
5000     }
5001
5002   cands = (important_candidates ? data->important_candidates : ivs->cands);
5003   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5004     {
5005       struct iv_cand *cand = iv_cand (data, i);
5006
5007       cp = get_use_iv_cost (data, use, cand);
5008
5009       if (cheaper_cost_pair (cp, best_cp))
5010         best_cp = cp;
5011     }
5012
5013   iv_ca_set_cp (data, ivs, use, best_cp);
5014 }
5015
5016 /* Get cost for assignment IVS.  */
5017
5018 static comp_cost
5019 iv_ca_cost (struct iv_ca *ivs)
5020 {
5021   /* This was a conditional expression but it triggered a bug in
5022      Sun C 5.5.  */
5023   if (ivs->bad_uses)
5024     return infinite_cost;
5025   else
5026     return ivs->cost;
5027 }
5028
5029 /* Returns true if all dependences of CP are among invariants in IVS.  */
5030
5031 static bool
5032 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5033 {
5034   unsigned i;
5035   bitmap_iterator bi;
5036
5037   if (!cp->depends_on)
5038     return true;
5039
5040   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5041     {
5042       if (ivs->n_invariant_uses[i] == 0)
5043         return false;
5044     }
5045
5046   return true;
5047 }
5048
5049 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5050    it before NEXT_CHANGE.  */
5051
5052 static struct iv_ca_delta *
5053 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5054                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5055 {
5056   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5057
5058   change->use = use;
5059   change->old_cp = old_cp;
5060   change->new_cp = new_cp;
5061   change->next_change = next_change;
5062
5063   return change;
5064 }
5065
5066 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5067    are rewritten.  */
5068
5069 static struct iv_ca_delta *
5070 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5071 {
5072   struct iv_ca_delta *last;
5073
5074   if (!l2)
5075     return l1;
5076
5077   if (!l1)
5078     return l2;
5079
5080   for (last = l1; last->next_change; last = last->next_change)
5081     continue;
5082   last->next_change = l2;
5083
5084   return l1;
5085 }
5086
5087 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5088
5089 static struct iv_ca_delta *
5090 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5091 {
5092   struct iv_ca_delta *act, *next, *prev = NULL;
5093   struct cost_pair *tmp;
5094
5095   for (act = delta; act; act = next)
5096     {
5097       next = act->next_change;
5098       act->next_change = prev;
5099       prev = act;
5100
5101       tmp = act->old_cp;
5102       act->old_cp = act->new_cp;
5103       act->new_cp = tmp;
5104     }
5105
5106   return prev;
5107 }
5108
5109 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5110    reverted instead.  */
5111
5112 static void
5113 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5114                     struct iv_ca_delta *delta, bool forward)
5115 {
5116   struct cost_pair *from, *to;
5117   struct iv_ca_delta *act;
5118
5119   if (!forward)
5120     delta = iv_ca_delta_reverse (delta);
5121
5122   for (act = delta; act; act = act->next_change)
5123     {
5124       from = act->old_cp;
5125       to = act->new_cp;
5126       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5127       iv_ca_set_cp (data, ivs, act->use, to);
5128     }
5129
5130   if (!forward)
5131     iv_ca_delta_reverse (delta);
5132 }
5133
5134 /* Returns true if CAND is used in IVS.  */
5135
5136 static bool
5137 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5138 {
5139   return ivs->n_cand_uses[cand->id] > 0;
5140 }
5141
5142 /* Returns number of induction variable candidates in the set IVS.  */
5143
5144 static unsigned
5145 iv_ca_n_cands (struct iv_ca *ivs)
5146 {
5147   return ivs->n_cands;
5148 }
5149
5150 /* Free the list of changes DELTA.  */
5151
5152 static void
5153 iv_ca_delta_free (struct iv_ca_delta **delta)
5154 {
5155   struct iv_ca_delta *act, *next;
5156
5157   for (act = *delta; act; act = next)
5158     {
5159       next = act->next_change;
5160       free (act);
5161     }
5162
5163   *delta = NULL;
5164 }
5165
5166 /* Allocates new iv candidates assignment.  */
5167
5168 static struct iv_ca *
5169 iv_ca_new (struct ivopts_data *data)
5170 {
5171   struct iv_ca *nw = XNEW (struct iv_ca);
5172
5173   nw->upto = 0;
5174   nw->bad_uses = 0;
5175   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5176   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5177   nw->cands = BITMAP_ALLOC (NULL);
5178   nw->n_cands = 0;
5179   nw->n_regs = 0;
5180   nw->cand_use_cost = zero_cost;
5181   nw->cand_cost = 0;
5182   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5183   nw->cost = zero_cost;
5184   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5185   nw->num_used_inv_expr = 0;
5186
5187   return nw;
5188 }
5189
5190 /* Free memory occupied by the set IVS.  */
5191
5192 static void
5193 iv_ca_free (struct iv_ca **ivs)
5194 {
5195   free ((*ivs)->cand_for_use);
5196   free ((*ivs)->n_cand_uses);
5197   BITMAP_FREE ((*ivs)->cands);
5198   free ((*ivs)->n_invariant_uses);
5199   free ((*ivs)->used_inv_expr);
5200   free (*ivs);
5201   *ivs = NULL;
5202 }
5203
5204 /* Dumps IVS to FILE.  */
5205
5206 static void
5207 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5208 {
5209   const char *pref = "  invariants ";
5210   unsigned i;
5211   comp_cost cost = iv_ca_cost (ivs);
5212
5213   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5214   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5215            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5216   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5217
5218    for (i = 0; i < ivs->upto; i++)
5219     {
5220       struct iv_use *use = iv_use (data, i);
5221       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5222       if (cp)
5223         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5224                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5225       else
5226         fprintf (file, "   use:%d --> ??\n", use->id);
5227     }
5228
5229   for (i = 1; i <= data->max_inv_id; i++)
5230     if (ivs->n_invariant_uses[i])
5231       {
5232         fprintf (file, "%s%d", pref, i);
5233         pref = ", ";
5234       }
5235   fprintf (file, "\n\n");
5236 }
5237
5238 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5239    new set, and store differences in DELTA.  Number of induction variables
5240    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5241    the function will try to find a solution with mimimal iv candidates.  */
5242
5243 static comp_cost
5244 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5245               struct iv_cand *cand, struct iv_ca_delta **delta,
5246               unsigned *n_ivs, bool min_ncand)
5247 {
5248   unsigned i;
5249   comp_cost cost;
5250   struct iv_use *use;
5251   struct cost_pair *old_cp, *new_cp;
5252
5253   *delta = NULL;
5254   for (i = 0; i < ivs->upto; i++)
5255     {
5256       use = iv_use (data, i);
5257       old_cp = iv_ca_cand_for_use (ivs, use);
5258
5259       if (old_cp
5260           && old_cp->cand == cand)
5261         continue;
5262
5263       new_cp = get_use_iv_cost (data, use, cand);
5264       if (!new_cp)
5265         continue;
5266
5267       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5268         continue;
5269
5270       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5271         continue;
5272
5273       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5274     }
5275
5276   iv_ca_delta_commit (data, ivs, *delta, true);
5277   cost = iv_ca_cost (ivs);
5278   if (n_ivs)
5279     *n_ivs = iv_ca_n_cands (ivs);
5280   iv_ca_delta_commit (data, ivs, *delta, false);
5281
5282   return cost;
5283 }
5284
5285 /* Try narrowing set IVS by removing CAND.  Return the cost of
5286    the new set and store the differences in DELTA.  */
5287
5288 static comp_cost
5289 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5290               struct iv_cand *cand, struct iv_ca_delta **delta)
5291 {
5292   unsigned i, ci;
5293   struct iv_use *use;
5294   struct cost_pair *old_cp, *new_cp, *cp;
5295   bitmap_iterator bi;
5296   struct iv_cand *cnd;
5297   comp_cost cost;
5298
5299   *delta = NULL;
5300   for (i = 0; i < n_iv_uses (data); i++)
5301     {
5302       use = iv_use (data, i);
5303
5304       old_cp = iv_ca_cand_for_use (ivs, use);
5305       if (old_cp->cand != cand)
5306         continue;
5307
5308       new_cp = NULL;
5309
5310       if (data->consider_all_candidates)
5311         {
5312           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5313             {
5314               if (ci == cand->id)
5315                 continue;
5316
5317               cnd = iv_cand (data, ci);
5318
5319               cp = get_use_iv_cost (data, use, cnd);
5320               if (!cp)
5321                 continue;
5322
5323               if (!iv_ca_has_deps (ivs, cp))
5324                 continue;
5325
5326               if (!cheaper_cost_pair (cp, new_cp))
5327                 continue;
5328
5329               new_cp = cp;
5330             }
5331         }
5332       else
5333         {
5334           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5335             {
5336               if (ci == cand->id)
5337                 continue;
5338
5339               cnd = iv_cand (data, ci);
5340
5341               cp = get_use_iv_cost (data, use, cnd);
5342               if (!cp)
5343                 continue;
5344               if (!iv_ca_has_deps (ivs, cp))
5345                 continue;
5346
5347               if (!cheaper_cost_pair (cp, new_cp))
5348                 continue;
5349
5350               new_cp = cp;
5351             }
5352         }
5353
5354       if (!new_cp)
5355         {
5356           iv_ca_delta_free (delta);
5357           return infinite_cost;
5358         }
5359
5360       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5361     }
5362
5363   iv_ca_delta_commit (data, ivs, *delta, true);
5364   cost = iv_ca_cost (ivs);
5365   iv_ca_delta_commit (data, ivs, *delta, false);
5366
5367   return cost;
5368 }
5369
5370 /* Try optimizing the set of candidates IVS by removing candidates different
5371    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5372    differences in DELTA.  */
5373
5374 static comp_cost
5375 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5376              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5377 {
5378   bitmap_iterator bi;
5379   struct iv_ca_delta *act_delta, *best_delta;
5380   unsigned i;
5381   comp_cost best_cost, acost;
5382   struct iv_cand *cand;
5383
5384   best_delta = NULL;
5385   best_cost = iv_ca_cost (ivs);
5386
5387   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5388     {
5389       cand = iv_cand (data, i);
5390
5391       if (cand == except_cand)
5392         continue;
5393
5394       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5395
5396       if (compare_costs (acost, best_cost) < 0)
5397         {
5398           best_cost = acost;
5399           iv_ca_delta_free (&best_delta);
5400           best_delta = act_delta;
5401         }
5402       else
5403         iv_ca_delta_free (&act_delta);
5404     }
5405
5406   if (!best_delta)
5407     {
5408       *delta = NULL;
5409       return best_cost;
5410     }
5411
5412   /* Recurse to possibly remove other unnecessary ivs.  */
5413   iv_ca_delta_commit (data, ivs, best_delta, true);
5414   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5415   iv_ca_delta_commit (data, ivs, best_delta, false);
5416   *delta = iv_ca_delta_join (best_delta, *delta);
5417   return best_cost;
5418 }
5419
5420 /* Tries to extend the sets IVS in the best possible way in order
5421    to express the USE.  If ORIGINALP is true, prefer candidates from
5422    the original set of IVs, otherwise favor important candidates not
5423    based on any memory object.  */
5424
5425 static bool
5426 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5427                   struct iv_use *use, bool originalp)
5428 {
5429   comp_cost best_cost, act_cost;
5430   unsigned i;
5431   bitmap_iterator bi;
5432   struct iv_cand *cand;
5433   struct iv_ca_delta *best_delta = NULL, *act_delta;
5434   struct cost_pair *cp;
5435
5436   iv_ca_add_use (data, ivs, use, false);
5437   best_cost = iv_ca_cost (ivs);
5438
5439   cp = iv_ca_cand_for_use (ivs, use);
5440   if (!cp)
5441     {
5442       ivs->upto--;
5443       ivs->bad_uses--;
5444       iv_ca_add_use (data, ivs, use, true);
5445       best_cost = iv_ca_cost (ivs);
5446       cp = iv_ca_cand_for_use (ivs, use);
5447     }
5448   if (cp)
5449     {
5450       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5451       iv_ca_set_no_cp (data, ivs, use);
5452     }
5453
5454   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5455      first try important candidates not based on any memory object.  Only if
5456      this fails, try the specific ones.  Rationale -- in loops with many
5457      variables the best choice often is to use just one generic biv.  If we
5458      added here many ivs specific to the uses, the optimization algorithm later
5459      would be likely to get stuck in a local minimum, thus causing us to create
5460      too many ivs.  The approach from few ivs to more seems more likely to be
5461      successful -- starting from few ivs, replacing an expensive use by a
5462      specific iv should always be a win.  */
5463   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5464     {
5465       cand = iv_cand (data, i);
5466
5467       if (originalp && cand->pos !=IP_ORIGINAL)
5468         continue;
5469
5470       if (!originalp && cand->iv->base_object != NULL_TREE)
5471         continue;
5472
5473       if (iv_ca_cand_used_p (ivs, cand))
5474         continue;
5475
5476       cp = get_use_iv_cost (data, use, cand);
5477       if (!cp)
5478         continue;
5479
5480       iv_ca_set_cp (data, ivs, use, cp);
5481       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5482                                true);
5483       iv_ca_set_no_cp (data, ivs, use);
5484       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5485
5486       if (compare_costs (act_cost, best_cost) < 0)
5487         {
5488           best_cost = act_cost;
5489
5490           iv_ca_delta_free (&best_delta);
5491           best_delta = act_delta;
5492         }
5493       else
5494         iv_ca_delta_free (&act_delta);
5495     }
5496
5497   if (infinite_cost_p (best_cost))
5498     {
5499       for (i = 0; i < use->n_map_members; i++)
5500         {
5501           cp = use->cost_map + i;
5502           cand = cp->cand;
5503           if (!cand)
5504             continue;
5505
5506           /* Already tried this.  */
5507           if (cand->important)
5508             {
5509               if (originalp && cand->pos == IP_ORIGINAL)
5510                 continue;
5511               if (!originalp && cand->iv->base_object == NULL_TREE)
5512                 continue;
5513             }
5514
5515           if (iv_ca_cand_used_p (ivs, cand))
5516             continue;
5517
5518           act_delta = NULL;
5519           iv_ca_set_cp (data, ivs, use, cp);
5520           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5521           iv_ca_set_no_cp (data, ivs, use);
5522           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5523                                        cp, act_delta);
5524
5525           if (compare_costs (act_cost, best_cost) < 0)
5526             {
5527               best_cost = act_cost;
5528
5529               if (best_delta)
5530                 iv_ca_delta_free (&best_delta);
5531               best_delta = act_delta;
5532             }
5533           else
5534             iv_ca_delta_free (&act_delta);
5535         }
5536     }
5537
5538   iv_ca_delta_commit (data, ivs, best_delta, true);
5539   iv_ca_delta_free (&best_delta);
5540
5541   return !infinite_cost_p (best_cost);
5542 }
5543
5544 /* Finds an initial assignment of candidates to uses.  */
5545
5546 static struct iv_ca *
5547 get_initial_solution (struct ivopts_data *data, bool originalp)
5548 {
5549   struct iv_ca *ivs = iv_ca_new (data);
5550   unsigned i;
5551
5552   for (i = 0; i < n_iv_uses (data); i++)
5553     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5554       {
5555         iv_ca_free (&ivs);
5556         return NULL;
5557       }
5558
5559   return ivs;
5560 }
5561
5562 /* Tries to improve set of induction variables IVS.  */
5563
5564 static bool
5565 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5566 {
5567   unsigned i, n_ivs;
5568   comp_cost acost, best_cost = iv_ca_cost (ivs);
5569   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5570   struct iv_cand *cand;
5571
5572   /* Try extending the set of induction variables by one.  */
5573   for (i = 0; i < n_iv_cands (data); i++)
5574     {
5575       cand = iv_cand (data, i);
5576
5577       if (iv_ca_cand_used_p (ivs, cand))
5578         continue;
5579
5580       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5581       if (!act_delta)
5582         continue;
5583
5584       /* If we successfully added the candidate and the set is small enough,
5585          try optimizing it by removing other candidates.  */
5586       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5587         {
5588           iv_ca_delta_commit (data, ivs, act_delta, true);
5589           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5590           iv_ca_delta_commit (data, ivs, act_delta, false);
5591           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5592         }
5593
5594       if (compare_costs (acost, best_cost) < 0)
5595         {
5596           best_cost = acost;
5597           iv_ca_delta_free (&best_delta);
5598           best_delta = act_delta;
5599         }
5600       else
5601         iv_ca_delta_free (&act_delta);
5602     }
5603
5604   if (!best_delta)
5605     {
5606       /* Try removing the candidates from the set instead.  */
5607       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5608
5609       /* Nothing more we can do.  */
5610       if (!best_delta)
5611         return false;
5612     }
5613
5614   iv_ca_delta_commit (data, ivs, best_delta, true);
5615   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5616   iv_ca_delta_free (&best_delta);
5617   return true;
5618 }
5619
5620 /* Attempts to find the optimal set of induction variables.  We do simple
5621    greedy heuristic -- we try to replace at most one candidate in the selected
5622    solution and remove the unused ivs while this improves the cost.  */
5623
5624 static struct iv_ca *
5625 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5626 {
5627   struct iv_ca *set;
5628
5629   /* Get the initial solution.  */
5630   set = get_initial_solution (data, originalp);
5631   if (!set)
5632     {
5633       if (dump_file && (dump_flags & TDF_DETAILS))
5634         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5635       return NULL;
5636     }
5637
5638   if (dump_file && (dump_flags & TDF_DETAILS))
5639     {
5640       fprintf (dump_file, "Initial set of candidates:\n");
5641       iv_ca_dump (data, dump_file, set);
5642     }
5643
5644   while (try_improve_iv_set (data, set))
5645     {
5646       if (dump_file && (dump_flags & TDF_DETAILS))
5647         {
5648           fprintf (dump_file, "Improved to:\n");
5649           iv_ca_dump (data, dump_file, set);
5650         }
5651     }
5652
5653   return set;
5654 }
5655
5656 static struct iv_ca *
5657 find_optimal_iv_set (struct ivopts_data *data)
5658 {
5659   unsigned i;
5660   struct iv_ca *set, *origset;
5661   struct iv_use *use;
5662   comp_cost cost, origcost;
5663
5664   /* Determine the cost based on a strategy that starts with original IVs,
5665      and try again using a strategy that prefers candidates not based
5666      on any IVs.  */
5667   origset = find_optimal_iv_set_1 (data, true);
5668   set = find_optimal_iv_set_1 (data, false);
5669
5670   if (!origset && !set)
5671     return NULL;
5672
5673   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5674   cost = set ? iv_ca_cost (set) : infinite_cost;
5675
5676   if (dump_file && (dump_flags & TDF_DETAILS))
5677     {
5678       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5679                origcost.cost, origcost.complexity);
5680       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5681                cost.cost, cost.complexity);
5682     }
5683
5684   /* Choose the one with the best cost.  */
5685   if (compare_costs (origcost, cost) <= 0)
5686     {
5687       if (set)
5688         iv_ca_free (&set);
5689       set = origset;
5690     }
5691   else if (origset)
5692     iv_ca_free (&origset);
5693
5694   for (i = 0; i < n_iv_uses (data); i++)
5695     {
5696       use = iv_use (data, i);
5697       use->selected = iv_ca_cand_for_use (set, use)->cand;
5698     }
5699
5700   return set;
5701 }
5702
5703 /* Creates a new induction variable corresponding to CAND.  */
5704
5705 static void
5706 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5707 {
5708   gimple_stmt_iterator incr_pos;
5709   tree base;
5710   bool after = false;
5711
5712   if (!cand->iv)
5713     return;
5714
5715   switch (cand->pos)
5716     {
5717     case IP_NORMAL:
5718       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5719       break;
5720
5721     case IP_END:
5722       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5723       after = true;
5724       break;
5725
5726     case IP_AFTER_USE:
5727       after = true;
5728       /* fall through */
5729     case IP_BEFORE_USE:
5730       incr_pos = gsi_for_stmt (cand->incremented_at);
5731       break;
5732
5733     case IP_ORIGINAL:
5734       /* Mark that the iv is preserved.  */
5735       name_info (data, cand->var_before)->preserve_biv = true;
5736       name_info (data, cand->var_after)->preserve_biv = true;
5737
5738       /* Rewrite the increment so that it uses var_before directly.  */
5739       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5740       return;
5741     }
5742
5743   gimple_add_tmp_var (cand->var_before);
5744   add_referenced_var (cand->var_before);
5745
5746   base = unshare_expr (cand->iv->base);
5747
5748   create_iv (base, unshare_expr (cand->iv->step),
5749              cand->var_before, data->current_loop,
5750              &incr_pos, after, &cand->var_before, &cand->var_after);
5751 }
5752
5753 /* Creates new induction variables described in SET.  */
5754
5755 static void
5756 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5757 {
5758   unsigned i;
5759   struct iv_cand *cand;
5760   bitmap_iterator bi;
5761
5762   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5763     {
5764       cand = iv_cand (data, i);
5765       create_new_iv (data, cand);
5766     }
5767
5768   if (dump_file && (dump_flags & TDF_DETAILS))
5769     {
5770       fprintf (dump_file, "\nSelected IV set: \n");
5771       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5772         {
5773           cand = iv_cand (data, i);
5774           dump_cand (dump_file, cand);
5775         }
5776       fprintf (dump_file, "\n");
5777     }
5778 }
5779
5780 /* Rewrites USE (definition of iv used in a nonlinear expression)
5781    using candidate CAND.  */
5782
5783 static void
5784 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5785                             struct iv_use *use, struct iv_cand *cand)
5786 {
5787   tree comp;
5788   tree op, tgt;
5789   gimple ass;
5790   gimple_stmt_iterator bsi;
5791
5792   /* An important special case -- if we are asked to express value of
5793      the original iv by itself, just exit; there is no need to
5794      introduce a new computation (that might also need casting the
5795      variable to unsigned and back).  */
5796   if (cand->pos == IP_ORIGINAL
5797       && cand->incremented_at == use->stmt)
5798     {
5799       tree step, ctype, utype;
5800       enum tree_code incr_code = PLUS_EXPR, old_code;
5801
5802       gcc_assert (is_gimple_assign (use->stmt));
5803       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5804
5805       step = cand->iv->step;
5806       ctype = TREE_TYPE (step);
5807       utype = TREE_TYPE (cand->var_after);
5808       if (TREE_CODE (step) == NEGATE_EXPR)
5809         {
5810           incr_code = MINUS_EXPR;
5811           step = TREE_OPERAND (step, 0);
5812         }
5813
5814       /* Check whether we may leave the computation unchanged.
5815          This is the case only if it does not rely on other
5816          computations in the loop -- otherwise, the computation
5817          we rely upon may be removed in remove_unused_ivs,
5818          thus leading to ICE.  */
5819       old_code = gimple_assign_rhs_code (use->stmt);
5820       if (old_code == PLUS_EXPR
5821           || old_code == MINUS_EXPR
5822           || old_code == POINTER_PLUS_EXPR)
5823         {
5824           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5825             op = gimple_assign_rhs2 (use->stmt);
5826           else if (old_code != MINUS_EXPR
5827                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5828             op = gimple_assign_rhs1 (use->stmt);
5829           else
5830             op = NULL_TREE;
5831         }
5832       else
5833         op = NULL_TREE;
5834
5835       if (op
5836           && (TREE_CODE (op) == INTEGER_CST
5837               || operand_equal_p (op, step, 0)))
5838         return;
5839
5840       /* Otherwise, add the necessary computations to express
5841          the iv.  */
5842       op = fold_convert (ctype, cand->var_before);
5843       comp = fold_convert (utype,
5844                            build2 (incr_code, ctype, op,
5845                                    unshare_expr (step)));
5846     }
5847   else
5848     {
5849       comp = get_computation (data->current_loop, use, cand);
5850       gcc_assert (comp != NULL_TREE);
5851     }
5852
5853   switch (gimple_code (use->stmt))
5854     {
5855     case GIMPLE_PHI:
5856       tgt = PHI_RESULT (use->stmt);
5857
5858       /* If we should keep the biv, do not replace it.  */
5859       if (name_info (data, tgt)->preserve_biv)
5860         return;
5861
5862       bsi = gsi_after_labels (gimple_bb (use->stmt));
5863       break;
5864
5865     case GIMPLE_ASSIGN:
5866       tgt = gimple_assign_lhs (use->stmt);
5867       bsi = gsi_for_stmt (use->stmt);
5868       break;
5869
5870     default:
5871       gcc_unreachable ();
5872     }
5873
5874   if (!valid_gimple_rhs_p (comp)
5875       || (gimple_code (use->stmt) != GIMPLE_PHI
5876           /* We can't allow re-allocating the stmt as it might be pointed
5877              to still.  */
5878           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5879               >= gimple_num_ops (gsi_stmt (bsi)))))
5880     {
5881       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5882                                        true, GSI_SAME_STMT);
5883       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5884         {
5885           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5886           /* As this isn't a plain copy we have to reset alignment
5887              information.  */
5888           if (SSA_NAME_PTR_INFO (comp))
5889             {
5890               SSA_NAME_PTR_INFO (comp)->align = 1;
5891               SSA_NAME_PTR_INFO (comp)->misalign = 0;
5892             }
5893         }
5894     }
5895
5896   if (gimple_code (use->stmt) == GIMPLE_PHI)
5897     {
5898       ass = gimple_build_assign (tgt, comp);
5899       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5900
5901       bsi = gsi_for_stmt (use->stmt);
5902       remove_phi_node (&bsi, false);
5903     }
5904   else
5905     {
5906       gimple_assign_set_rhs_from_tree (&bsi, comp);
5907       use->stmt = gsi_stmt (bsi);
5908     }
5909 }
5910
5911 /* Copies the reference information from OLD_REF to NEW_REF.  */
5912
5913 static void
5914 copy_ref_info (tree new_ref, tree old_ref)
5915 {
5916   tree new_ptr_base = NULL_TREE;
5917
5918   TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5919   TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5920
5921   new_ptr_base = TREE_OPERAND (new_ref, 0);
5922
5923   /* We can transfer points-to information from an old pointer
5924      or decl base to the new one.  */
5925   if (new_ptr_base
5926       && TREE_CODE (new_ptr_base) == SSA_NAME
5927       && !SSA_NAME_PTR_INFO (new_ptr_base))
5928     {
5929       tree base = get_base_address (old_ref);
5930       if (!base)
5931         ;
5932       else if ((TREE_CODE (base) == MEM_REF
5933                 || TREE_CODE (base) == TARGET_MEM_REF)
5934                && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
5935                && SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)))
5936         {
5937           struct ptr_info_def *new_pi;
5938           duplicate_ssa_name_ptr_info
5939             (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5940           new_pi = SSA_NAME_PTR_INFO (new_ptr_base);
5941           /* We have to be careful about transfering alignment information.  */
5942           if (TREE_CODE (old_ref) == MEM_REF
5943               && !(TREE_CODE (new_ref) == TARGET_MEM_REF
5944                    && (TMR_INDEX2 (new_ref)
5945                        || (TMR_STEP (new_ref)
5946                            && (TREE_INT_CST_LOW (TMR_STEP (new_ref))
5947                                < new_pi->align)))))
5948             {
5949               new_pi->misalign += double_int_sub (mem_ref_offset (old_ref),
5950                                                   mem_ref_offset (new_ref)).low;
5951               new_pi->misalign &= (new_pi->align - 1);
5952             }
5953           else
5954             {
5955               new_pi->align = 1;
5956               new_pi->misalign = 0;
5957             }
5958         }
5959       else if (TREE_CODE (base) == VAR_DECL
5960                || TREE_CODE (base) == PARM_DECL
5961                || TREE_CODE (base) == RESULT_DECL)
5962         {
5963           struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5964           pt_solution_set_var (&pi->pt, base);
5965         }
5966     }
5967 }
5968
5969 /* Performs a peephole optimization to reorder the iv update statement with
5970    a mem ref to enable instruction combining in later phases. The mem ref uses
5971    the iv value before the update, so the reordering transformation requires
5972    adjustment of the offset. CAND is the selected IV_CAND.
5973
5974    Example:
5975
5976    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
5977    iv2 = iv1 + 1;
5978
5979    if (t < val)      (1)
5980      goto L;
5981    goto Head;
5982
5983
5984    directly propagating t over to (1) will introduce overlapping live range
5985    thus increase register pressure. This peephole transform it into:
5986
5987
5988    iv2 = iv1 + 1;
5989    t = MEM_REF (base, iv2, 8, 8);
5990    if (t < val)
5991      goto L;
5992    goto Head;
5993 */
5994
5995 static void
5996 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
5997 {
5998   tree var_after;
5999   gimple iv_update, stmt;
6000   basic_block bb;
6001   gimple_stmt_iterator gsi, gsi_iv;
6002
6003   if (cand->pos != IP_NORMAL)
6004     return;
6005
6006   var_after = cand->var_after;
6007   iv_update = SSA_NAME_DEF_STMT (var_after);
6008
6009   bb = gimple_bb (iv_update);
6010   gsi = gsi_last_nondebug_bb (bb);
6011   stmt = gsi_stmt (gsi);
6012
6013   /* Only handle conditional statement for now.  */
6014   if (gimple_code (stmt) != GIMPLE_COND)
6015     return;
6016
6017   gsi_prev_nondebug (&gsi);
6018   stmt = gsi_stmt (gsi);
6019   if (stmt != iv_update)
6020     return;
6021
6022   gsi_prev_nondebug (&gsi);
6023   if (gsi_end_p (gsi))
6024     return;
6025
6026   stmt = gsi_stmt (gsi);
6027   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6028     return;
6029
6030   if (stmt != use->stmt)
6031     return;
6032
6033   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6034     return;
6035
6036   if (dump_file && (dump_flags & TDF_DETAILS))
6037     {
6038       fprintf (dump_file, "Reordering \n");
6039       print_gimple_stmt (dump_file, iv_update, 0, 0);
6040       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6041       fprintf (dump_file, "\n");
6042     }
6043
6044   gsi = gsi_for_stmt (use->stmt);
6045   gsi_iv = gsi_for_stmt (iv_update);
6046   gsi_move_before (&gsi_iv, &gsi);
6047
6048   cand->pos = IP_BEFORE_USE;
6049   cand->incremented_at = use->stmt;
6050 }
6051
6052 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6053
6054 static void
6055 rewrite_use_address (struct ivopts_data *data,
6056                      struct iv_use *use, struct iv_cand *cand)
6057 {
6058   aff_tree aff;
6059   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6060   tree base_hint = NULL_TREE;
6061   tree ref, iv;
6062   bool ok;
6063
6064   adjust_iv_update_pos (cand, use);
6065   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6066   gcc_assert (ok);
6067   unshare_aff_combination (&aff);
6068
6069   /* To avoid undefined overflow problems, all IV candidates use unsigned
6070      integer types.  The drawback is that this makes it impossible for
6071      create_mem_ref to distinguish an IV that is based on a memory object
6072      from one that represents simply an offset.
6073
6074      To work around this problem, we pass a hint to create_mem_ref that
6075      indicates which variable (if any) in aff is an IV based on a memory
6076      object.  Note that we only consider the candidate.  If this is not
6077      based on an object, the base of the reference is in some subexpression
6078      of the use -- but these will use pointer types, so they are recognized
6079      by the create_mem_ref heuristics anyway.  */
6080   if (cand->iv->base_object)
6081     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6082
6083   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6084   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6085                         reference_alias_ptr_type (*use->op_p),
6086                         iv, base_hint, data->speed);
6087   copy_ref_info (ref, *use->op_p);
6088   *use->op_p = ref;
6089 }
6090
6091 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6092    candidate CAND.  */
6093
6094 static void
6095 rewrite_use_compare (struct ivopts_data *data,
6096                      struct iv_use *use, struct iv_cand *cand)
6097 {
6098   tree comp, *var_p, op, bound;
6099   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6100   enum tree_code compare;
6101   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6102   bool ok;
6103
6104   bound = cp->value;
6105   if (bound)
6106     {
6107       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6108       tree var_type = TREE_TYPE (var);
6109       gimple_seq stmts;
6110
6111       if (dump_file && (dump_flags & TDF_DETAILS))
6112         {
6113           fprintf (dump_file, "Replacing exit test: ");
6114           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6115         }
6116       compare = iv_elimination_compare (data, use);
6117       bound = unshare_expr (fold_convert (var_type, bound));
6118       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6119       if (stmts)
6120         gsi_insert_seq_on_edge_immediate (
6121                 loop_preheader_edge (data->current_loop),
6122                 stmts);
6123
6124       gimple_cond_set_lhs (use->stmt, var);
6125       gimple_cond_set_code (use->stmt, compare);
6126       gimple_cond_set_rhs (use->stmt, op);
6127       return;
6128     }
6129
6130   /* The induction variable elimination failed; just express the original
6131      giv.  */
6132   comp = get_computation (data->current_loop, use, cand);
6133   gcc_assert (comp != NULL_TREE);
6134
6135   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6136   gcc_assert (ok);
6137
6138   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6139                                      true, GSI_SAME_STMT);
6140 }
6141
6142 /* Rewrites USE using candidate CAND.  */
6143
6144 static void
6145 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6146 {
6147   switch (use->type)
6148     {
6149       case USE_NONLINEAR_EXPR:
6150         rewrite_use_nonlinear_expr (data, use, cand);
6151         break;
6152
6153       case USE_ADDRESS:
6154         rewrite_use_address (data, use, cand);
6155         break;
6156
6157       case USE_COMPARE:
6158         rewrite_use_compare (data, use, cand);
6159         break;
6160
6161       default:
6162         gcc_unreachable ();
6163     }
6164
6165   update_stmt (use->stmt);
6166 }
6167
6168 /* Rewrite the uses using the selected induction variables.  */
6169
6170 static void
6171 rewrite_uses (struct ivopts_data *data)
6172 {
6173   unsigned i;
6174   struct iv_cand *cand;
6175   struct iv_use *use;
6176
6177   for (i = 0; i < n_iv_uses (data); i++)
6178     {
6179       use = iv_use (data, i);
6180       cand = use->selected;
6181       gcc_assert (cand);
6182
6183       rewrite_use (data, use, cand);
6184     }
6185 }
6186
6187 /* Removes the ivs that are not used after rewriting.  */
6188
6189 static void
6190 remove_unused_ivs (struct ivopts_data *data)
6191 {
6192   unsigned j;
6193   bitmap_iterator bi;
6194   bitmap toremove = BITMAP_ALLOC (NULL);
6195
6196   /* Figure out an order in which to release SSA DEFs so that we don't
6197      release something that we'd have to propagate into a debug stmt
6198      afterwards.  */
6199   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6200     {
6201       struct version_info *info;
6202
6203       info = ver_info (data, j);
6204       if (info->iv
6205           && !integer_zerop (info->iv->step)
6206           && !info->inv_id
6207           && !info->iv->have_use_for
6208           && !info->preserve_biv)
6209         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6210     }
6211
6212   release_defs_bitset (toremove);
6213
6214   BITMAP_FREE (toremove);
6215 }
6216
6217 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6218    for pointer_map_traverse.  */
6219
6220 static bool
6221 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6222                       void *data ATTRIBUTE_UNUSED)
6223 {
6224   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6225
6226   free (niter);
6227   return true;
6228 }
6229
6230 /* Frees data allocated by the optimization of a single loop.  */
6231
6232 static void
6233 free_loop_data (struct ivopts_data *data)
6234 {
6235   unsigned i, j;
6236   bitmap_iterator bi;
6237   tree obj;
6238
6239   if (data->niters)
6240     {
6241       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6242       pointer_map_destroy (data->niters);
6243       data->niters = NULL;
6244     }
6245
6246   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6247     {
6248       struct version_info *info;
6249
6250       info = ver_info (data, i);
6251       if (info->iv)
6252         free (info->iv);
6253       info->iv = NULL;
6254       info->has_nonlin_use = false;
6255       info->preserve_biv = false;
6256       info->inv_id = 0;
6257     }
6258   bitmap_clear (data->relevant);
6259   bitmap_clear (data->important_candidates);
6260
6261   for (i = 0; i < n_iv_uses (data); i++)
6262     {
6263       struct iv_use *use = iv_use (data, i);
6264
6265       free (use->iv);
6266       BITMAP_FREE (use->related_cands);
6267       for (j = 0; j < use->n_map_members; j++)
6268         if (use->cost_map[j].depends_on)
6269           BITMAP_FREE (use->cost_map[j].depends_on);
6270       free (use->cost_map);
6271       free (use);
6272     }
6273   VEC_truncate (iv_use_p, data->iv_uses, 0);
6274
6275   for (i = 0; i < n_iv_cands (data); i++)
6276     {
6277       struct iv_cand *cand = iv_cand (data, i);
6278
6279       if (cand->iv)
6280         free (cand->iv);
6281       if (cand->depends_on)
6282         BITMAP_FREE (cand->depends_on);
6283       free (cand);
6284     }
6285   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6286
6287   if (data->version_info_size < num_ssa_names)
6288     {
6289       data->version_info_size = 2 * num_ssa_names;
6290       free (data->version_info);
6291       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6292     }
6293
6294   data->max_inv_id = 0;
6295
6296   FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6297     SET_DECL_RTL (obj, NULL_RTX);
6298
6299   VEC_truncate (tree, decl_rtl_to_reset, 0);
6300
6301   htab_empty (data->inv_expr_tab);
6302   data->inv_expr_id = 0;
6303 }
6304
6305 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6306    loop tree.  */
6307
6308 static void
6309 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6310 {
6311   free_loop_data (data);
6312   free (data->version_info);
6313   BITMAP_FREE (data->relevant);
6314   BITMAP_FREE (data->important_candidates);
6315
6316   VEC_free (tree, heap, decl_rtl_to_reset);
6317   VEC_free (iv_use_p, heap, data->iv_uses);
6318   VEC_free (iv_cand_p, heap, data->iv_candidates);
6319   htab_delete (data->inv_expr_tab);
6320 }
6321
6322 /* Returns true if the loop body BODY includes any function calls.  */
6323
6324 static bool
6325 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6326 {
6327   gimple_stmt_iterator gsi;
6328   unsigned i;
6329
6330   for (i = 0; i < num_nodes; i++)
6331     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6332       {
6333         gimple stmt = gsi_stmt (gsi);
6334         if (is_gimple_call (stmt)
6335             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6336           return true;
6337       }
6338   return false;
6339 }
6340
6341 /* Optimizes the LOOP.  Returns true if anything changed.  */
6342
6343 static bool
6344 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6345 {
6346   bool changed = false;
6347   struct iv_ca *iv_ca;
6348   edge exit;
6349   basic_block *body;
6350
6351   gcc_assert (!data->niters);
6352   data->current_loop = loop;
6353   data->speed = optimize_loop_for_speed_p (loop);
6354
6355   if (dump_file && (dump_flags & TDF_DETAILS))
6356     {
6357       fprintf (dump_file, "Processing loop %d\n", loop->num);
6358
6359       exit = single_dom_exit (loop);
6360       if (exit)
6361         {
6362           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6363                    exit->src->index, exit->dest->index);
6364           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6365           fprintf (dump_file, "\n");
6366         }
6367
6368       fprintf (dump_file, "\n");
6369     }
6370
6371   body = get_loop_body (loop);
6372   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6373   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6374   free (body);
6375
6376   /* For each ssa name determines whether it behaves as an induction variable
6377      in some loop.  */
6378   if (!find_induction_variables (data))
6379     goto finish;
6380
6381   /* Finds interesting uses (item 1).  */
6382   find_interesting_uses (data);
6383   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6384     goto finish;
6385
6386   /* Finds candidates for the induction variables (item 2).  */
6387   find_iv_candidates (data);
6388
6389   /* Calculates the costs (item 3, part 1).  */
6390   determine_iv_costs (data);
6391   determine_use_iv_costs (data);
6392   determine_set_costs (data);
6393
6394   /* Find the optimal set of induction variables (item 3, part 2).  */
6395   iv_ca = find_optimal_iv_set (data);
6396   if (!iv_ca)
6397     goto finish;
6398   changed = true;
6399
6400   /* Create the new induction variables (item 4, part 1).  */
6401   create_new_ivs (data, iv_ca);
6402   iv_ca_free (&iv_ca);
6403
6404   /* Rewrite the uses (item 4, part 2).  */
6405   rewrite_uses (data);
6406
6407   /* Remove the ivs that are unused after rewriting.  */
6408   remove_unused_ivs (data);
6409
6410   /* We have changed the structure of induction variables; it might happen
6411      that definitions in the scev database refer to some of them that were
6412      eliminated.  */
6413   scev_reset ();
6414
6415 finish:
6416   free_loop_data (data);
6417
6418   return changed;
6419 }
6420
6421 /* Main entry point.  Optimizes induction variables in loops.  */
6422
6423 void
6424 tree_ssa_iv_optimize (void)
6425 {
6426   struct loop *loop;
6427   struct ivopts_data data;
6428   loop_iterator li;
6429
6430   tree_ssa_iv_optimize_init (&data);
6431
6432   /* Optimize the loops starting with the innermost ones.  */
6433   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6434     {
6435       if (dump_file && (dump_flags & TDF_DETAILS))
6436         flow_loop_dump (loop, dump_file, NULL, 1);
6437
6438       tree_ssa_iv_optimize_loop (&data, loop);
6439     }
6440
6441   tree_ssa_iv_optimize_finalize (&data);
6442 }