2010-07-14 Richard Guenther <rguenther@suse.de>

[pf3gnuchains/gcc-fork.git] / gcc / tree-ssa-loop-ivopts.c
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c

index 82e45d2..83ec13e 100644 (file)
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -1,19 +1,19 @@
  /* Induction variable optimizations.
-   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
     Free Software Foundation, Inc.
-   
+
  This file is part of GCC.
-   
+
  GCC is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
  Free Software Foundation; either version 3, or (at your option) any
  later version.
-   
+
  GCC is distributed in the hope that it will be useful, but WITHOUT
  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  for more details.
-   
+
  You should have received a copy of the GNU General Public License
  along with GCC; see the file COPYING3.  If not see
  <http://www.gnu.org/licenses/>.  */
@@ -56,7 +56,7 @@ along with GCC; see the file COPYING3.  If not see
  
     4) The trees are transformed to use the new variables, the dead code is
        removed.
-   
+
     All of this is done loop by loop.  Doing it globally is theoretically
     possible, it might give a better performance and it might enable us
     to decide costs more precisely, but getting all the interactions right
@@ -67,18 +67,15 @@ along with GCC; see the file COPYING3.  If not see
  #include "coretypes.h"
  #include "tm.h"
  #include "tree.h"
-#include "rtl.h"
  #include "tm_p.h"
-#include "hard-reg-set.h"
  #include "basic-block.h"
  #include "output.h"
-#include "diagnostic.h"
+#include "tree-pretty-print.h"
+#include "gimple-pretty-print.h"
  #include "tree-flow.h"
  #include "tree-dump.h"
  #include "timevar.h"
  #include "cfgloop.h"
-#include "varray.h"
-#include "expr.h"
  #include "tree-pass.h"
  #include "ggc.h"
  #include "insn-config.h"
@@ -92,6 +89,12 @@ along with GCC; see the file COPYING3.  If not see
  #include "langhooks.h"
  #include "tree-affine.h"
  #include "target.h"
+#include "tree-ssa-propagate.h"
+
+/* FIXME: Expressions are expanded to RTL in this pass to determine the
+   cost of different addressing modes.  This should be moved to a TBD
+   interface between the GIMPLE and RTL worlds.  */
+#include "expr.h"
  
  /* The infinite cost.  */
  #define INFTY 10000000
@@ -120,8 +123,8 @@ struct version_info
    struct iv *iv;       /* Induction variable description.  */
    bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
                            an expression that is not an induction variable.  */
-  unsigned inv_id;     /* Id of an invariant.  */
    bool preserve_biv;   /* For the original biv, whether to preserve it.  */
+  unsigned inv_id;     /* Id of an invariant.  */
  };
  
  /* Types of uses.  */
@@ -192,7 +195,7 @@ struct iv_cand
    unsigned id;         /* The number of the candidate.  */
    bool important;      /* Whether this is an "important" candidate, i.e. such
                            that it should be considered by all uses.  */
-  enum iv_position pos;        /* Where it is computed.  */
+  ENUM_BITFIELD(iv_position) pos : 8;  /* Where it is computed.  */
    gimple incremented_at;/* For original biv, the statement where it is
                            incremented.  */
    tree var_before;     /* The variable used for it before increment.  */
@@ -257,6 +260,9 @@ struct ivopts_data
  
    /* Are we optimizing for speed?  */
    bool speed;
+
+  /* Whether the loop body includes any function calls.  */
+  bool body_includes_call;
  };
  
  /* An assignment of iv candidates to uses.  */
@@ -682,6 +688,11 @@ contains_abnormal_ssa_name_p (tree expr)
                             idx_contains_abnormal_ssa_name_p,
                             NULL);
  
+  if (code == COND_EXPR)
+    return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
+      || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
+      || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
+
    switch (codeclass)
      {
      case tcc_binary:
@@ -806,7 +817,7 @@ determine_base_object (tree expr)
        if (!base)
         return expr;
  
-      if (TREE_CODE (base) == INDIRECT_REF)
+      if (TREE_CODE (base) == MEM_REF)
         return determine_base_object (TREE_OPERAND (base, 0));
  
        return fold_convert (ptr_type_node,
@@ -1076,7 +1087,7 @@ find_induction_variables (struct ivopts_data *data)
           print_generic_expr (dump_file, niter, TDF_SLIM);
           fprintf (dump_file, "\n\n");
         };
- 
+
        fprintf (dump_file, "Induction variables:\n\n");
  
        EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
@@ -1159,7 +1170,7 @@ find_interesting_uses_op (struct ivopts_data *data, tree op)
    iv = get_iv (data, op);
    if (!iv)
      return NULL;
-  
+
    if (iv->have_use_for)
      {
        use = iv_use (data, iv->use_id);
@@ -1352,8 +1363,7 @@ idx_find_step (tree base, tree *idx, void *data)
    tree step, iv_base, iv_step, lbound, off;
    struct loop *loop = dta->ivopts_data->current_loop;
  
-  if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
-      || TREE_CODE (base) == ALIGN_INDIRECT_REF)
+  if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF)
      return false;
  
    /* If base is a component ref, require that the offset of the reference
@@ -1537,16 +1547,18 @@ may_be_unaligned_p (tree ref, tree step)
  
    if (mode != BLKmode)
      {
-      double_int mul;
-      tree al = build_int_cst (TREE_TYPE (step),
-                              GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
+      unsigned mode_align = GET_MODE_ALIGNMENT (mode);
+
+      if (base_align < mode_align
+         || (bitpos % mode_align) != 0
+         || (bitpos % BITS_PER_UNIT) != 0)
+       return true;
  
-      if (base_align < GET_MODE_ALIGNMENT (mode)
-         || bitpos % GET_MODE_ALIGNMENT (mode) != 0
-         || bitpos % BITS_PER_UNIT != 0)
+      if (toffset
+         && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
         return true;
-    
-      if (!constant_multiple_of (step, al, &mul))
+
+      if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
         return true;
      }
  
@@ -1663,7 +1675,6 @@ find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p
         goto fail;
        step = ifs_ivopts_data.step;
  
-      gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
        gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
  
        /* Check that the base expression is addressable.  This needs
@@ -1685,8 +1696,14 @@ find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p
           tree *ref = &TREE_OPERAND (base, 0);
           while (handled_component_p (*ref))
             ref = &TREE_OPERAND (*ref, 0);
-         if (TREE_CODE (*ref) == INDIRECT_REF)
-           *ref = fold_indirect_ref (*ref);
+         if (TREE_CODE (*ref) == MEM_REF)
+           {
+             tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
+                                     TREE_OPERAND (*ref, 0),
+                                     TREE_OPERAND (*ref, 1));
+             if (tem)
+               *ref = tem;
+           }
         }
      }
  
@@ -2005,7 +2022,8 @@ strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
        expr = build_fold_addr_expr (op0);
        return fold_convert (orig_type, expr);
  
-    case INDIRECT_REF:
+    case MEM_REF:
+      /* ???  Offset operand?  */
        inside_addr = false;
        break;
  
@@ -2097,7 +2115,7 @@ add_candidate_1 (struct ivopts_data *data,
    unsigned i;
    struct iv_cand *cand = NULL;
    tree type, orig_type;
-  
+
    if (base)
      {
        orig_type = TREE_TYPE (base);
@@ -2267,7 +2285,7 @@ add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
     it.  The candidate computation is scheduled on all available positions.  */
  
  static void
-add_candidate (struct ivopts_data *data, 
+add_candidate (struct ivopts_data *data,
                tree base, tree step, bool important, struct iv_use *use)
  {
    if (ip_normal_pos (data->current_loop))
@@ -2604,7 +2622,7 @@ get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
  
        return ret;
      }
-      
+
    /* n_map_members is a power of two, so this computes modulo.  */
    s = cand->id & (use->n_map_members - 1);
    for (i = s; i < use->n_map_members; i++)
@@ -2645,7 +2663,7 @@ produce_memory_decl_rtl (tree obj, int *regno)
    addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
    enum machine_mode address_mode = targetm.addr_space.address_mode (as);
    rtx x;
-  
+
    gcc_assert (obj);
    if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
      {
@@ -2734,9 +2752,10 @@ computation_cost (tree expr, bool speed)
    unsigned cost;
    /* Avoid using hard regs in ways which may be unsupported.  */
    int regno = LAST_VIRTUAL_REGISTER + 1;
-  enum function_frequency real_frequency = cfun->function_frequency;
+  struct cgraph_node *node = cgraph_node (current_function_decl);
+  enum node_frequency real_frequency = node->frequency;
  
-  cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
+  node->frequency = NODE_FREQUENCY_NORMAL;
    crtl->maybe_hot_insn_p = speed;
    walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
    start_sequence ();
@@ -2744,7 +2763,7 @@ computation_cost (tree expr, bool speed)
    seq = get_insns ();
    end_sequence ();
    default_rtl_profile ();
-  cfun->function_frequency = real_frequency;
+  node->frequency = real_frequency;
  
    cost = seq_cost (seq, speed);
    if (MEM_P (rslt))
@@ -2876,7 +2895,7 @@ get_computation_aff (struct loop *loop,
    if (stmt_after_increment (loop, cand, at))
      {
        aff_tree cstep_aff;
-  
+
        if (common_type != uutype)
         cstep_common = fold_convert (common_type, cstep);
        else
@@ -2922,6 +2941,20 @@ get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
    return get_computation_at (loop, use, cand, use->stmt);
  }
  
+/* Adjust the cost COST for being in loop setup rather than loop body.
+   If we're optimizing for space, the loop setup overhead is constant;
+   if we're optimizing for speed, amortize it over the per-iteration cost.  */
+static unsigned
+adjust_setup_cost (struct ivopts_data *data, unsigned cost)
+{
+  if (cost == INFTY)
+    return cost;
+  else if (optimize_loop_for_speed_p (data->current_loop))
+    return cost / AVG_LOOP_NITER (data->current_loop);
+  else
+    return cost;
+}
+
  /* Returns cost of addition in MODE.  */
  
  static unsigned
@@ -2947,7 +2980,7 @@ add_cost (enum machine_mode mode, bool speed)
      cost = 1;
  
    costs[mode] = cost;
-      
+
    if (dump_file && (dump_flags & TDF_DETAILS))
      fprintf (dump_file, "Addition in %s costs %d\n",
              GET_MODE_NAME (mode), cost);
@@ -3012,7 +3045,7 @@ multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
                gen_int_mode (cst, mode), NULL_RTX, 0);
    seq = get_insns ();
    end_sequence ();
-  
+
    cost = seq_cost (seq, speed);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
@@ -3241,7 +3274,7 @@ get_address_cost (bool symbol_present, bool var_present,
             base = gen_int_mode (off, address_mode);
           else
             base = NULL_RTX;
-    
+
           if (base)
             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
  
@@ -3293,7 +3326,7 @@ get_address_cost (bool symbol_present, bool var_present,
        if (dump_file && (dump_flags & TDF_DETAILS))
         {
           fprintf (dump_file, "Address costs:\n");
-      
+
           for (i = 0; i < 16; i++)
             {
               sym_p = i & 1;
@@ -3498,7 +3531,7 @@ force_expr_to_var_cost (tree expr, bool speed)
      case MULT_EXPR:
        if (cst_and_fits_in_hwi (op0))
         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
-      else if (cst_and_fits_in_hwi (op1))                                  
+      else if (cst_and_fits_in_hwi (op1))
         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
        else
         return new_cost (target_spill_cost [speed], 0);
@@ -3553,7 +3586,7 @@ split_address_cost (struct ivopts_data *data,
    tree toffset;
    enum machine_mode mode;
    int unsignedp, volatilep;
-  
+
    core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
                               &unsignedp, &volatilep, false);
  
@@ -3576,7 +3609,7 @@ split_address_cost (struct ivopts_data *data,
        *var_present = false;
        return zero_cost;
      }
-      
+
    *symbol_present = false;
    *var_present = true;
    return zero_cost;
@@ -3750,7 +3783,7 @@ get_computation_cost_at (struct ivopts_data *data,
  
    if (!constant_multiple_of (ustep, cstep, &rat))
      return infinite_cost;
-    
+
    if (double_int_fits_in_shwi_p (rat))
      ratio = double_int_to_shwi (rat);
    else
@@ -3761,14 +3794,14 @@ get_computation_cost_at (struct ivopts_data *data,
  
    /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
       or ratio == 1, it is better to handle this like
-     
+
       ubase - ratio * cbase + ratio * var
-     
+
       (also holds in the case ratio == -1, TODO.  */
  
    if (cst_and_fits_in_hwi (cbase))
      {
-      offset = - ratio * int_cst_value (cbase); 
+      offset = - ratio * int_cst_value (cbase);
        cost = difference_cost (data,
                               ubase, build_int_cst (utype, 0),
                               &symbol_present, &var_present, &offset,
@@ -3834,8 +3867,8 @@ get_computation_cost_at (struct ivopts_data *data,
    /* Symbol + offset should be compile-time computable so consider that they
        are added once to the variable, if present.  */
    if (var_present && (symbol_present || offset))
-    cost.cost += add_cost (TYPE_MODE (ctype), speed)
-                / AVG_LOOP_NITER (data->current_loop);
+    cost.cost += adjust_setup_cost (data,
+                                   add_cost (TYPE_MODE (ctype), speed));
  
    /* Having offset does not affect runtime cost in case it is added to
       symbol, but it increases complexity.  */
@@ -3847,6 +3880,7 @@ get_computation_cost_at (struct ivopts_data *data,
    aratio = ratio > 0 ? ratio : -ratio;
    if (aratio != 1)
      cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
+  return cost;
  
  fallback:
    if (can_autoinc)
@@ -3860,7 +3894,7 @@ fallback:
        return infinite_cost;
  
      if (address_p)
-      comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
+      comp = build_simple_mem_ref (comp);
  
      return new_cost (computation_cost (comp, speed), 0);
    }
@@ -4085,6 +4119,7 @@ determine_use_iv_cost_condition (struct ivopts_data *data,
    bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
    comp_cost elim_cost, express_cost, cost;
    bool ok;
+  tree *control_var, *bound_cst;
  
    /* Only consider real candidates.  */
    if (!cand->iv)
@@ -4099,16 +4134,29 @@ determine_use_iv_cost_condition (struct ivopts_data *data,
        elim_cost = force_var_cost (data, bound, &depends_on_elim);
        /* The bound is a loop invariant, so it will be only computed
          once.  */
-      elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
+      elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
      }
    else
      elim_cost = infinite_cost;
  
    /* Try expressing the original giv.  If it is compared with an invariant,
       note that we cannot get rid of it.  */
-  ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
+  ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
+                             NULL, &cmp_iv);
    gcc_assert (ok);
  
+  /* When the condition is a comparison of the candidate IV against
+     zero, prefer this IV.
+
+     TODO: The constant that we're substracting from the cost should
+     be target-dependent.  This information should be added to the
+     target costs for each backend.  */
+  if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
+      && integer_zerop (*bound_cst)
+      && (operand_equal_p (*control_var, cand->var_after, 0)
+         || operand_equal_p (*control_var, cand->var_before, 0)))
+    elim_cost.cost -= 1;
+
    express_cost = get_computation_cost (data, use, cand, false,
                                        &depends_on_express, NULL);
    fd_ivopts_data = data;
@@ -4333,7 +4381,7 @@ determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
    cost_base = force_var_cost (data, base, NULL);
    cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
  
-  cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
+  cost = cost_step + adjust_setup_cost (data, cost_base.cost);
  
    /* Prefer the original ivs unless we may gain something by replacing it.
       The reason is to make debugging simpler; so this is not relevant for
@@ -4341,7 +4389,7 @@ determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
    if (cand->pos != IP_ORIGINAL
        || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
      cost++;
-  
+
    /* Prefer not to insert statements into latch unless there are some
       already (so that we do not create unnecessary jumps).  */
    if (cand->pos == IP_END
@@ -4386,7 +4434,8 @@ ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
  {
    /* We add size to the cost, so that we prefer eliminating ivs
       if possible.  */
-  return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
+  return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
+                                           data->body_includes_call);
  }
  
  /* For each size of the induction variable set determine the penalty.  */
@@ -4401,30 +4450,11 @@ determine_set_costs (struct ivopts_data *data)
    struct loop *loop = data->current_loop;
    bitmap_iterator bi;
  
-  /* We use the following model (definitely improvable, especially the
-     cost function -- TODO):
-
-     We estimate the number of registers available (using MD data), name it A.
-
-     We estimate the number of registers used by the loop, name it U.  This
-     number is obtained as the number of loop phi nodes (not counting virtual
-     registers and bivs) + the number of variables from outside of the loop.
-
-     We set a reserve R (free regs that are used for temporary computations,
-     etc.).  For now the reserve is a constant 3.
-
-     Let I be the number of induction variables.
-     
-     -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
-       make a lot of ivs without a reason).
-     -- if A - R < U + I <= A, the cost is I * PRES_COST
-     -- if U + I > A, the cost is I * PRES_COST and
-        number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
-
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
        fprintf (dump_file, "Global costs:\n");
        fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
+      fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
        fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
        fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
      }
@@ -4893,7 +4923,7 @@ iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
  
        if (!iv_ca_has_deps (ivs, new_cp))
         continue;
-      
+
        if (!cheaper_cost_pair (new_cp, old_cp))
         continue;
  
@@ -4948,7 +4978,7 @@ iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
                 continue;
               if (!iv_ca_has_deps (ivs, cp))
                 continue;
-      
+
               if (!cheaper_cost_pair (cp, new_cp))
                 continue;
  
@@ -4969,7 +4999,7 @@ iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
                 continue;
               if (!iv_ca_has_deps (ivs, cp))
                 continue;
-      
+
               if (!cheaper_cost_pair (cp, new_cp))
                 continue;
  
@@ -5044,11 +5074,13 @@ iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
  }
  
  /* Tries to extend the sets IVS in the best possible way in order
-   to express the USE.  */
+   to express the USE.  If ORIGINALP is true, prefer candidates from
+   the original set of IVs, otherwise favor important candidates not
+   based on any memory object.  */
  
  static bool
  try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
-                 struct iv_use *use)
+                 struct iv_use *use, bool originalp)
  {
    comp_cost best_cost, act_cost;
    unsigned i;
@@ -5067,7 +5099,8 @@ try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
        iv_ca_set_no_cp (data, ivs, use);
      }
  
-  /* First try important candidates not based on any memory object.  Only if
+  /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
+     first try important candidates not based on any memory object.  Only if
       this fails, try the specific ones.  Rationale -- in loops with many
       variables the best choice often is to use just one generic biv.  If we
       added here many ivs specific to the uses, the optimization algorithm later
@@ -5079,7 +5112,10 @@ try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
      {
        cand = iv_cand (data, i);
  
-      if (cand->iv->base_object != NULL_TREE)
+      if (originalp && cand->pos !=IP_ORIGINAL)
+       continue;
+
+      if (!originalp && cand->iv->base_object != NULL_TREE)
         continue;
  
        if (iv_ca_cand_used_p (ivs, cand))
@@ -5115,9 +5151,14 @@ try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
             continue;
  
           /* Already tried this.  */
-         if (cand->important && cand->iv->base_object == NULL_TREE)
-           continue;
-      
+         if (cand->important)
+           {
+             if (originalp && cand->pos == IP_ORIGINAL)
+               continue;
+             if (!originalp && cand->iv->base_object == NULL_TREE)
+               continue;
+           }
+
           if (iv_ca_cand_used_p (ivs, cand))
             continue;
  
@@ -5150,13 +5191,13 @@ try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
  /* Finds an initial assignment of candidates to uses.  */
  
  static struct iv_ca *
-get_initial_solution (struct ivopts_data *data)
+get_initial_solution (struct ivopts_data *data, bool originalp)
  {
    struct iv_ca *ivs = iv_ca_new (data);
    unsigned i;
  
    for (i = 0; i < n_iv_uses (data); i++)
-    if (!try_add_cand_for (data, ivs, iv_use (data, i)))
+    if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
        {
         iv_ca_free (&ivs);
         return NULL;
@@ -5179,7 +5220,7 @@ try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
    for (i = 0; i < n_iv_cands (data); i++)
      {
        cand = iv_cand (data, i);
-      
+
        if (iv_ca_cand_used_p (ivs, cand))
         continue;
  
@@ -5228,14 +5269,12 @@ try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
     solution and remove the unused ivs while this improves the cost.  */
  
  static struct iv_ca *
-find_optimal_iv_set (struct ivopts_data *data)
+find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
  {
-  unsigned i;
    struct iv_ca *set;
-  struct iv_use *use;
  
    /* Get the initial solution.  */
-  set = get_initial_solution (data);
+  set = get_initial_solution (data, originalp);
    if (!set)
      {
        if (dump_file && (dump_flags & TDF_DETAILS))
@@ -5258,11 +5297,46 @@ find_optimal_iv_set (struct ivopts_data *data)
         }
      }
  
+  return set;
+}
+
+static struct iv_ca *
+find_optimal_iv_set (struct ivopts_data *data)
+{
+  unsigned i;
+  struct iv_ca *set, *origset;
+  struct iv_use *use;
+  comp_cost cost, origcost;
+
+  /* Determine the cost based on a strategy that starts with original IVs,
+     and try again using a strategy that prefers candidates not based
+     on any IVs.  */
+  origset = find_optimal_iv_set_1 (data, true);
+  set = find_optimal_iv_set_1 (data, false);
+
+  if (!origset && !set)
+    return NULL;
+
+  origcost = origset ? iv_ca_cost (origset) : infinite_cost;
+  cost = set ? iv_ca_cost (set) : infinite_cost;
+
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
-      comp_cost cost = iv_ca_cost (set);
-      fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
+      fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
+              origcost.cost, origcost.complexity);
+      fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
+              cost.cost, cost.complexity);
+    }
+
+  /* Choose the one with the best cost.  */
+  if (compare_costs (origcost, cost) <= 0)
+    {
+      if (set)
+       iv_ca_free (&set);
+      set = origset;
      }
+  else if (origset)
+    iv_ca_free (&origset);
  
    for (i = 0; i < n_iv_uses (data); i++)
      {
@@ -5310,10 +5384,10 @@ create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
  
        /* Rewrite the increment so that it uses var_before directly.  */
        find_interesting_uses_op (data, cand->var_after)->selected = cand;
-      
+
        return;
      }
- 
+
    gimple_add_tmp_var (cand->var_before);
    add_referenced_var (cand->var_before);
  
@@ -5435,12 +5509,22 @@ rewrite_use_nonlinear_expr (struct ivopts_data *data,
        gcc_unreachable ();
      }
  
-  op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
-                                true, GSI_SAME_STMT);
+  if (!valid_gimple_rhs_p (comp)
+      || (gimple_code (use->stmt) != GIMPLE_PHI
+         /* We can't allow re-allocating the stmt as it might be pointed
+            to still.  */
+         && (get_gimple_rhs_num_ops (TREE_CODE (comp))
+             >= gimple_num_ops (gsi_stmt (bsi)))))
+    {
+      comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
+                                      true, GSI_SAME_STMT);
+      if (POINTER_TYPE_P (TREE_TYPE (tgt)))
+       duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
+    }
  
    if (gimple_code (use->stmt) == GIMPLE_PHI)
      {
-      ass = gimple_build_assign (tgt, op);
+      ass = gimple_build_assign (tgt, comp);
        gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
  
        bsi = gsi_for_stmt (use->stmt);
@@ -5448,7 +5532,7 @@ rewrite_use_nonlinear_expr (struct ivopts_data *data,
      }
    else
      {
-      gimple_assign_set_rhs_from_tree (&bsi, op);
+      gimple_assign_set_rhs_from_tree (&bsi, comp);
        use->stmt = gsi_stmt (bsi);
      }
  }
@@ -5496,10 +5580,43 @@ unshare_and_remove_ssa_names (tree ref)
  static void
  copy_ref_info (tree new_ref, tree old_ref)
  {
-  if (TREE_CODE (old_ref) == TARGET_MEM_REF)
-    copy_mem_ref_info (new_ref, old_ref);
-  else
+  tree new_ptr_base = NULL_TREE;
+
+  if (TREE_CODE (old_ref) == TARGET_MEM_REF
+      && TREE_CODE (new_ref) == TARGET_MEM_REF)
+    TMR_ORIGINAL (new_ref) = TMR_ORIGINAL (old_ref);
+  else if (TREE_CODE (new_ref) == TARGET_MEM_REF)
      TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
+
+  TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
+  TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
+
+  if (TREE_CODE (new_ref) == TARGET_MEM_REF)
+    new_ptr_base = TMR_BASE (new_ref);
+  else if (TREE_CODE (new_ref) == MEM_REF)
+    new_ptr_base = TREE_OPERAND (new_ref, 0);
+
+  /* We can transfer points-to information from an old pointer
+     or decl base to the new one.  */
+  if (new_ptr_base
+      && TREE_CODE (new_ptr_base) == SSA_NAME
+      && POINTER_TYPE_P (TREE_TYPE (new_ptr_base))
+      && !SSA_NAME_PTR_INFO (new_ptr_base))
+    {
+      tree base = get_base_address (old_ref);
+      if ((INDIRECT_REF_P (base)
+          || TREE_CODE (base) == MEM_REF)
+         && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
+       duplicate_ssa_name_ptr_info
+           (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
+      else if (TREE_CODE (base) == VAR_DECL
+              || TREE_CODE (base) == PARM_DECL
+              || TREE_CODE (base) == RESULT_DECL)
+       {
+         struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
+         pt_solution_set_var (&pi->pt, base);
+       }
+    }
  }
  
  /* Rewrites USE (address that is an iv) using candidate CAND.  */
@@ -5510,6 +5627,7 @@ rewrite_use_address (struct ivopts_data *data,
  {
    aff_tree aff;
    gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
+  tree base_hint = NULL_TREE;
    tree ref;
    bool ok;
  
@@ -5517,7 +5635,23 @@ rewrite_use_address (struct ivopts_data *data,
    gcc_assert (ok);
    unshare_aff_combination (&aff);
  
-  ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
+  /* To avoid undefined overflow problems, all IV candidates use unsigned
+     integer types.  The drawback is that this makes it impossible for
+     create_mem_ref to distinguish an IV that is based on a memory object
+     from one that represents simply an offset.
+
+     To work around this problem, we pass a hint to create_mem_ref that
+     indicates which variable (if any) in aff is an IV based on a memory
+     object.  Note that we only consider the candidate.  If this is not
+     based on an object, the base of the reference is in some subexpression
+     of the use -- but these will use pointer types, so they are recognized
+     by the create_mem_ref heuristics anyway.  */
+  if (cand->iv->base_object)
+    base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
+
+  ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p),
+                       reference_alias_ptr_type (*use->op_p),
+                       &aff, base_hint, data->speed);
    copy_ref_info (ref, *use->op_p);
    *use->op_p = ref;
  }
@@ -5590,7 +5724,7 @@ rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
        default:
         gcc_unreachable ();
      }
-  
+
    update_stmt (use->stmt);
  }
  
@@ -5730,6 +5864,25 @@ tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
    VEC_free (iv_cand_p, heap, data->iv_candidates);
  }
  
+/* Returns true if the loop body BODY includes any function calls.  */
+
+static bool
+loop_body_includes_call (basic_block *body, unsigned num_nodes)
+{
+  gimple_stmt_iterator gsi;
+  unsigned i;
+
+  for (i = 0; i < num_nodes; i++)
+    for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
+      {
+       gimple stmt = gsi_stmt (gsi);
+       if (is_gimple_call (stmt)
+           && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
+         return true;
+      }
+  return false;
+}
+
  /* Optimizes the LOOP.  Returns true if anything changed.  */
  
  static bool
@@ -5747,7 +5900,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
        fprintf (dump_file, "Processing loop %d\n", loop->num);
-      
+
        exit = single_dom_exit (loop);
        if (exit)
         {
@@ -5761,6 +5914,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
      }
  
    body = get_loop_body (loop);
+  data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
    renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
    free (body);
  
@@ -5791,7 +5945,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
    /* Create the new induction variables (item 4, part 1).  */
    create_new_ivs (data, iv_ca);
    iv_ca_free (&iv_ca);
-  
+
    /* Rewrite the uses (item 4, part 2).  */
    rewrite_uses (data);