* trans-expr.c (gfc_conv_expr): Move address taking...

[pf3gnuchains/gcc-fork.git] / gcc / tree-ssa-loop-ivopts.c
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c

index 88fc015..15a2dd7 100644 (file)
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -1,5 +1,5 @@
  /* Induction variable optimizations.
-   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
     Free Software Foundation, Inc.
  
  This file is part of GCC.
@@ -92,6 +92,12 @@ along with GCC; see the file COPYING3.  If not see
  #include "tree-inline.h"
  #include "tree-ssa-propagate.h"
  
+/* FIXME: add_cost and zero_cost defined in exprmed.h conflict with local uses.
+ */
+#include "expmed.h"
+#undef add_cost
+#undef zero_cost
+
  /* FIXME: Expressions are expanded to RTL in this pass to determine the
     cost of different addressing modes.  This should be moved to a TBD
     interface between the GIMPLE and RTL worlds.  */
@@ -109,7 +115,7 @@ along with GCC; see the file COPYING3.  If not see
  static inline HOST_WIDE_INT
  avg_loop_niter (struct loop *loop)
  {
-  HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
+  HOST_WIDE_INT niter = max_stmt_executions_int (loop, false);
    if (niter == -1)
      return AVG_LOOP_NITER (loop);
  
@@ -170,6 +176,7 @@ struct cost_pair
    tree value;          /* For final value elimination, the expression for
                            the final value of the iv.  For iv elimination,
                            the new bound to compare with.  */
+  enum tree_code comp; /* For iv elimination, the comparison.  */
    int inv_expr_id;      /* Loop invariant expression id.  */
  };
  
@@ -291,6 +298,9 @@ struct ivopts_data
  
    /* Whether the loop body includes any function calls.  */
    bool body_includes_call;
+
+  /* Whether the loop body can only be exited via single exit.  */
+  bool loop_single_exit_p;
  };
  
  /* An assignment of iv candidates to uses.  */
@@ -377,6 +387,8 @@ struct iv_ca_delta
  
  static VEC(tree,heap) *decl_rtl_to_reset;
  
+static comp_cost force_expr_to_var_cost (tree, bool);
+
  /* Number of uses recorded in DATA.  */
  
  static inline unsigned
@@ -762,15 +774,13 @@ contains_abnormal_ssa_name_p (tree expr)
    return false;
  }
  
-/*  Returns tree describing number of iterations determined from
+/*  Returns the structure describing number of iterations determined from
      EXIT of DATA->current_loop, or NULL if something goes wrong.  */
  
-static tree
-niter_for_exit (struct ivopts_data *data, edge exit,
-                struct tree_niter_desc **desc_p)
+static struct tree_niter_desc *
+niter_for_exit (struct ivopts_data *data, edge exit)
  {
-  struct tree_niter_desc* desc = NULL;
-  tree niter;
+  struct tree_niter_desc *desc;
    void **slot;
  
    if (!data->niters)
@@ -783,37 +793,31 @@ niter_for_exit (struct ivopts_data *data, edge exit,
  
    if (!slot)
      {
-      /* Try to determine number of iterations.  We must know it
-        unconditionally (i.e., without possibility of # of iterations
-        being zero).  Also, we cannot safely work with ssa names that
-        appear in phi nodes on abnormal edges, so that we do not create
-        overlapping life ranges for them (PR 27283).  */
+      /* Try to determine number of iterations.  We cannot safely work with ssa
+         names that appear in phi nodes on abnormal edges, so that we do not
+         create overlapping life ranges for them (PR 27283).  */
        desc = XNEW (struct tree_niter_desc);
-      if (number_of_iterations_exit (data->current_loop,
-                                    exit, desc, true)
-         && integer_zerop (desc->may_be_zero)
-         && !contains_abnormal_ssa_name_p (desc->niter))
-       niter = desc->niter;
-      else
-       niter = NULL_TREE;
-
-      desc->niter = niter;
+      if (!number_of_iterations_exit (data->current_loop,
+                                     exit, desc, true)
+         || contains_abnormal_ssa_name_p (desc->niter))
+       {
+         XDELETE (desc);
+         desc = NULL;
+       }
        slot = pointer_map_insert (data->niters, exit);
        *slot = desc;
      }
    else
-    niter = ((struct tree_niter_desc *) *slot)->niter;
+    desc = (struct tree_niter_desc *) *slot;
  
-  if (desc_p)
-    *desc_p = (struct tree_niter_desc *) *slot;
-  return niter;
+  return desc;
  }
  
-/* Returns tree describing number of iterations determined from
+/* Returns the structure describing number of iterations determined from
     single dominating exit of DATA->current_loop, or NULL if something
     goes wrong.  */
  
-static tree
+static struct tree_niter_desc *
  niter_for_single_dom_exit (struct ivopts_data *data)
  {
    edge exit = single_dom_exit (data->current_loop);
@@ -821,7 +825,7 @@ niter_for_single_dom_exit (struct ivopts_data *data)
    if (!exit)
      return NULL;
  
-  return niter_for_exit (data, exit, NULL);
+  return niter_for_exit (data, exit);
  }
  
  /* Hash table equality function for expressions.  */
@@ -834,7 +838,8 @@ htab_inv_expr_eq (const void *ent1, const void *ent2)
    const struct iv_inv_expr_ent *expr2 =
        (const struct iv_inv_expr_ent *)ent2;
  
-  return operand_equal_p (expr1->expr, expr2->expr, 0);
+  return expr1->hash == expr2->hash
+        && operand_equal_p (expr1->expr, expr2->expr, 0);
  }
  
  /* Hash function for loop invariant expressions.  */
@@ -1026,7 +1031,7 @@ find_bivs (struct ivopts_data *data)
        if (step)
         {
           if (POINTER_TYPE_P (type))
-           step = fold_convert (sizetype, step);
+           step = convert_to_ptrofftype (step);
           else
             step = fold_convert (type, step);
         }
@@ -1101,6 +1106,12 @@ find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
        || contains_abnormal_ssa_name_p (iv->step))
      return false;
  
+  /* If STMT could throw, then do not consider STMT as defining a GIV.  
+     While this will suppress optimizations, we can not safely delete this
+     GIV and associated statements, even if it appears it is not used.  */
+  if (stmt_could_throw_p (stmt))
+    return false;
+
    return true;
  }
  
@@ -1159,12 +1170,17 @@ find_induction_variables (struct ivopts_data *data)
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
-      tree niter = niter_for_single_dom_exit (data);
+      struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
  
        if (niter)
         {
           fprintf (dump_file, "  number of iterations ");
-         print_generic_expr (dump_file, niter, TDF_SLIM);
+         print_generic_expr (dump_file, niter->niter, TDF_SLIM);
+         if (!integer_zerop (niter->may_be_zero))
+           {
+             fprintf (dump_file, "; zero if ");
+             print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
+           }
           fprintf (dump_file, "\n\n");
         };
  
@@ -1620,7 +1636,8 @@ may_be_unaligned_p (tree ref, tree step)
    base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
                               &unsignedp, &volatilep, true);
    base_type = TREE_TYPE (base);
-  base_align = TYPE_ALIGN (base_type);
+  base_align = get_object_alignment (base);
+  base_align = MAX (base_align, TYPE_ALIGN (base_type));
  
    if (mode != BLKmode)
      {
@@ -2201,7 +2218,10 @@ add_candidate_1 (struct ivopts_data *data,
    struct iv_cand *cand = NULL;
    tree type, orig_type;
  
-  if (base)
+  /* For non-original variables, make sure their values are computed in a type
+     that does not invoke undefined behavior on overflows (since in general,
+     we cannot prove that these induction variables are non-wrapping).  */
+  if (pos != IP_ORIGINAL)
      {
        orig_type = TREE_TYPE (base);
        type = generic_type_for (orig_type);
@@ -2647,13 +2667,13 @@ infinite_cost_p (comp_cost cost)
  
  /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
     on invariants DEPENDS_ON and that the value used in expressing it
-   is VALUE.  */
+   is VALUE, and in case of iv elimination the comparison operator is COMP.  */
  
  static void
  set_use_iv_cost (struct ivopts_data *data,
                  struct iv_use *use, struct iv_cand *cand,
                  comp_cost cost, bitmap depends_on, tree value,
-                 int inv_expr_id)
+                enum tree_code comp, int inv_expr_id)
  {
    unsigned i, s;
  
@@ -2669,6 +2689,7 @@ set_use_iv_cost (struct ivopts_data *data,
        use->cost_map[cand->id].cost = cost;
        use->cost_map[cand->id].depends_on = depends_on;
        use->cost_map[cand->id].value = value;
+      use->cost_map[cand->id].comp = comp;
        use->cost_map[cand->id].inv_expr_id = inv_expr_id;
        return;
      }
@@ -2689,6 +2710,7 @@ found:
    use->cost_map[i].cost = cost;
    use->cost_map[i].depends_on = depends_on;
    use->cost_map[i].value = value;
+  use->cost_map[i].comp = comp;
    use->cost_map[i].inv_expr_id = inv_expr_id;
  }
  
@@ -2738,7 +2760,7 @@ seq_cost (rtx seq, bool speed)
      {
        set = single_set (seq);
        if (set)
-       cost += rtx_cost (set, SET,speed);
+       cost += set_src_cost (SET_SRC (set), speed);
        else
         cost++;
      }
@@ -2842,7 +2864,7 @@ computation_cost (tree expr, bool speed)
    unsigned cost;
    /* Avoid using hard regs in ways which may be unsupported.  */
    int regno = LAST_VIRTUAL_REGISTER + 1;
-  struct cgraph_node *node = cgraph_node (current_function_decl);
+  struct cgraph_node *node = cgraph_get_node (current_function_decl);
    enum node_frequency real_frequency = node->frequency;
  
    node->frequency = NODE_FREQUENCY_NORMAL;
@@ -2859,6 +2881,8 @@ computation_cost (tree expr, bool speed)
    if (MEM_P (rslt))
      cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
                           TYPE_ADDR_SPACE (type), speed);
+  else if (!REG_P (rslt))
+    cost += set_src_cost (rslt, speed);
  
    return cost;
  }
@@ -2874,26 +2898,6 @@ var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
      return cand->var_before;
  }
  
-/* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
-   but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
-
-int
-tree_int_cst_sign_bit (const_tree t)
-{
-  unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
-  unsigned HOST_WIDE_INT w;
-
-  if (bitno < HOST_BITS_PER_WIDE_INT)
-    w = TREE_INT_CST_LOW (t);
-  else
-    {
-      w = TREE_INT_CST_HIGH (t);
-      bitno -= HOST_BITS_PER_WIDE_INT;
-    }
-
-  return (w >> bitno) & 1;
-}
-
  /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
     same precision that is at least as wide as the precision of TYPE, stores
     BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
@@ -3497,6 +3501,42 @@ get_address_cost (bool symbol_present, bool var_present,
    return new_cost (cost + acost, complexity);
  }
  
+ /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
+    the EXPR operand holding the shift.  COST0 and COST1 are the costs for
+    calculating the operands of EXPR.  Returns true if successful, and returns
+    the cost in COST.  */
+
+static bool
+get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
+                   comp_cost cost1, tree mult, bool speed, comp_cost *cost)
+{
+  comp_cost res;
+  tree op1 = TREE_OPERAND (expr, 1);
+  tree cst = TREE_OPERAND (mult, 1);
+  tree multop = TREE_OPERAND (mult, 0);
+  int m = exact_log2 (int_cst_value (cst));
+  int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
+  int sa_cost;
+
+  if (!(m >= 0 && m < maxm))
+    return false;
+
+  sa_cost = (TREE_CODE (expr) != MINUS_EXPR
+             ? shiftadd_cost[speed][mode][m]
+             : (mult == op1
+                ? shiftsub1_cost[speed][mode][m]
+                : shiftsub0_cost[speed][mode][m]));
+  res = new_cost (sa_cost, 0);
+  res = add_costs (res, mult == op1 ? cost0 : cost1);
+
+  STRIP_NOPS (multop);
+  if (!is_gimple_val (multop))
+    res = add_costs (res, force_expr_to_var_cost (multop, speed));
+
+  *cost = res;
+  return true;
+}
+
  /* Estimates cost of forcing expression EXPR into a variable.  */
  
  static comp_cost
@@ -3533,9 +3573,7 @@ force_expr_to_var_cost (tree expr, bool speed)
           symbol_cost[i] = computation_cost (addr, i) + 1;
  
           address_cost[i]
-           = computation_cost (build2 (POINTER_PLUS_EXPR, type,
-                                       addr,
-                                       build_int_cst (sizetype, 2000)), i) + 1;
+           = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
           if (dump_file && (dump_flags & TDF_DETAILS))
             {
               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
@@ -3622,6 +3660,21 @@ force_expr_to_var_cost (tree expr, bool speed)
      case MINUS_EXPR:
      case NEGATE_EXPR:
        cost = new_cost (add_cost (mode, speed), 0);
+      if (TREE_CODE (expr) != NEGATE_EXPR)
+        {
+          tree mult = NULL_TREE;
+          comp_cost sa_cost;
+          if (TREE_CODE (op1) == MULT_EXPR)
+            mult = op1;
+          else if (TREE_CODE (op0) == MULT_EXPR)
+            mult = op0;
+
+          if (mult != NULL_TREE
+              && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
+              && get_shiftadd_cost (expr, mode, cost0, cost1, mult, speed,
+                                    &sa_cost))
+            return sa_cost;
+        }
        break;
  
      case MULT_EXPR:
@@ -3828,6 +3881,28 @@ compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
    return true;
  }
  
+/* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
+
+static int
+get_expr_id (struct ivopts_data *data, tree expr)
+{
+  struct iv_inv_expr_ent ent;
+  struct iv_inv_expr_ent **slot;
+
+  ent.expr = expr;
+  ent.hash = iterative_hash_expr (expr, 0);
+  slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
+                                                     &ent, INSERT);
+  if (*slot)
+    return (*slot)->id;
+
+  *slot = XNEW (struct iv_inv_expr_ent);
+  (*slot)->expr = expr;
+  (*slot)->hash = ent.hash;
+  (*slot)->id = data->inv_expr_id++;
+  return (*slot)->id;
+}
+
  /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
     requires a new compiler generated temporary.  Returns -1 otherwise.
     ADDRESS_P is a flag indicating if the expression is for address
@@ -3840,8 +3915,6 @@ get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
  {
    aff_tree ubase_aff, cbase_aff;
    tree expr, ub, cb;
-  struct iv_inv_expr_ent ent;
-  struct iv_inv_expr_ent **slot;
  
    STRIP_NOPS (ubase);
    STRIP_NOPS (cbase);
@@ -3929,18 +4002,7 @@ get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
    aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
    aff_combination_add (&ubase_aff, &cbase_aff);
    expr = aff_combination_to_tree (&ubase_aff);
-  ent.expr = expr;
-  ent.hash = iterative_hash_expr (expr, 0);
-  slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
-                                                     &ent, INSERT);
-  if (*slot)
-    return (*slot)->id;
-
-  *slot = XNEW (struct iv_inv_expr_ent);
-  (*slot)->expr = expr;
-  (*slot)->hash = ent.hash;
-  (*slot)->id = data->inv_expr_id++;
-  return  (*slot)->id;
+  return get_expr_id (data, expr);
  }
  
  
@@ -4027,6 +4089,8 @@ get_computation_cost_at (struct ivopts_data *data,
    STRIP_NOPS (cbase);
    ctype = TREE_TYPE (cbase);
  
+  stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
+
    /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
       or ratio == 1, it is better to handle this like
  
@@ -4045,8 +4109,24 @@ get_computation_cost_at (struct ivopts_data *data,
      }
    else if (ratio == 1)
      {
+      tree real_cbase = cbase;
+
+      /* Check to see if any adjustment is needed.  */
+      if (cstepi == 0 && stmt_is_after_inc)
+        {
+          aff_tree real_cbase_aff;
+          aff_tree cstep_aff;
+
+          tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
+                                   &real_cbase_aff);
+          tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
+
+          aff_combination_add (&real_cbase_aff, &cstep_aff);
+          real_cbase = aff_combination_to_tree (&real_cbase_aff);
+        }
+
        cost = difference_cost (data,
-                             ubase, cbase,
+                             ubase, real_cbase,
                               &symbol_present, &var_present, &offset,
                               depends_on);
        cost.cost /= avg_loop_niter (data->current_loop);
@@ -4088,7 +4168,6 @@ get_computation_cost_at (struct ivopts_data *data,
  
    /* If we are after the increment, the value of the candidate is higher by
       one iteration.  */
-  stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
    if (stmt_is_after_inc)
      offset -= ratio * cstepi;
  
@@ -4184,14 +4263,15 @@ determine_use_iv_cost_generic (struct ivopts_data *data,
    if (cand->pos == IP_ORIGINAL
        && cand->incremented_at == use->stmt)
      {
-      set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
+      set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE,
+                       ERROR_MARK, -1);
        return true;
      }
  
    cost = get_computation_cost (data, use, cand, false, &depends_on,
                                 NULL, &inv_expr_id);
  
-  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
+  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
                     inv_expr_id);
  
    return !infinite_cost_p (cost);
@@ -4219,7 +4299,7 @@ determine_use_iv_cost_address (struct ivopts_data *data,
        else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
         cost = infinite_cost;
      }
-  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
+  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
                     inv_expr_id);
  
    return !infinite_cost_p (cost);
@@ -4295,16 +4375,261 @@ iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
    return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
  }
  
+static tree
+strip_wrap_conserving_type_conversions (tree exp)
+{
+  while (tree_ssa_useless_type_conversion (exp)
+        && (nowrap_type_p (TREE_TYPE (exp))
+            == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
+    exp = TREE_OPERAND (exp, 0);
+  return exp;
+}
+
+/* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
+   check for an exact match.  */
+
+static bool
+expr_equal_p (tree e, tree what)
+{
+  gimple stmt;
+  enum tree_code code;
+
+  e = strip_wrap_conserving_type_conversions (e);
+  what = strip_wrap_conserving_type_conversions (what);
+
+  code = TREE_CODE (what);
+  if (TREE_TYPE (e) != TREE_TYPE (what))
+    return false;
+
+  if (operand_equal_p (e, what, 0))
+    return true;
+
+  if (TREE_CODE (e) != SSA_NAME)
+    return false;
+
+  stmt = SSA_NAME_DEF_STMT (e);
+  if (gimple_code (stmt) != GIMPLE_ASSIGN
+      || gimple_assign_rhs_code (stmt) != code)
+    return false;
+
+  switch (get_gimple_rhs_class (code))
+    {
+    case GIMPLE_BINARY_RHS:
+      if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
+       return false;
+      /* Fallthru.  */
+
+    case GIMPLE_UNARY_RHS:
+    case GIMPLE_SINGLE_RHS:
+      return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
+    default:
+      return false;
+    }
+}
+
+/* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
+   we only detect the situation that BASE = SOMETHING + OFFSET, where the
+   calculation is performed in non-wrapping type.
+
+   TODO: More generally, we could test for the situation that
+        BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
+        This would require knowing the sign of OFFSET.
+
+        Also, we only look for the first addition in the computation of BASE.
+        More complex analysis would be better, but introducing it just for
+        this optimization seems like an overkill.  */
+
+static bool
+difference_cannot_overflow_p (tree base, tree offset)
+{
+  enum tree_code code;
+  tree e1, e2;
+
+  if (!nowrap_type_p (TREE_TYPE (base)))
+    return false;
+
+  base = expand_simple_operations (base);
+
+  if (TREE_CODE (base) == SSA_NAME)
+    {
+      gimple stmt = SSA_NAME_DEF_STMT (base);
+
+      if (gimple_code (stmt) != GIMPLE_ASSIGN)
+       return false;
+
+      code = gimple_assign_rhs_code (stmt);
+      if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
+       return false;
+
+      e1 = gimple_assign_rhs1 (stmt);
+      e2 = gimple_assign_rhs2 (stmt);
+    }
+  else
+    {
+      code = TREE_CODE (base);
+      if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
+       return false;
+      e1 = TREE_OPERAND (base, 0);
+      e2 = TREE_OPERAND (base, 1);
+    }
+
+  /* TODO: deeper inspection may be necessary to prove the equality.  */
+  switch (code)
+    {
+    case PLUS_EXPR:
+      return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
+    case POINTER_PLUS_EXPR:
+      return expr_equal_p (e2, offset);
+
+    default:
+      return false;
+    }
+}
+
+/* Tries to replace loop exit by one formulated in terms of a LT_EXPR
+   comparison with CAND.  NITER describes the number of iterations of
+   the loops.  If successful, the comparison in COMP_P is altered accordingly.
+
+   We aim to handle the following situation:
+
+   sometype *base, *p;
+   int a, b, i;
+
+   i = a;
+   p = p_0 = base + a;
+
+   do
+     {
+       bla (*p);
+       p++;
+       i++;
+     }
+   while (i < b);
+
+   Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
+   We aim to optimize this to
+
+   p = p_0 = base + a;
+   do
+     {
+       bla (*p);
+       p++;
+     }
+   while (p < p_0 - a + b);
+
+   This preserves the correctness, since the pointer arithmetics does not
+   overflow.  More precisely:
+
+   1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
+      overflow in computing it or the values of p.
+   2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
+      overflow.  To prove this, we use the fact that p_0 = base + a.  */
+
+static bool
+iv_elimination_compare_lt (struct ivopts_data *data,
+                           struct iv_cand *cand, enum tree_code *comp_p,
+                          struct tree_niter_desc *niter)
+{
+  tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
+  struct affine_tree_combination nit, tmpa, tmpb;
+  enum tree_code comp;
+  HOST_WIDE_INT step;
+
+  /* We need to know that the candidate induction variable does not overflow.
+     While more complex analysis may be used to prove this, for now just
+     check that the variable appears in the original program and that it
+     is computed in a type that guarantees no overflows.  */
+  cand_type = TREE_TYPE (cand->iv->base);
+  if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
+    return false;
+
+  /* Make sure that the loop iterates till the loop bound is hit, as otherwise
+     the calculation of the BOUND could overflow, making the comparison
+     invalid.  */
+  if (!data->loop_single_exit_p)
+    return false;
+
+  /* We need to be able to decide whether candidate is increasing or decreasing
+     in order to choose the right comparison operator.  */
+  if (!cst_and_fits_in_hwi (cand->iv->step))
+    return false;
+  step = int_cst_value (cand->iv->step);
+
+  /* Check that the number of iterations matches the expected pattern:
+     a + 1 > b ? 0 : b - a - 1.  */
+  mbz = niter->may_be_zero;
+  if (TREE_CODE (mbz) == GT_EXPR)
+    {
+      /* Handle a + 1 > b.  */
+      tree op0 = TREE_OPERAND (mbz, 0);
+      if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
+       {
+         a = TREE_OPERAND (op0, 0);
+         b = TREE_OPERAND (mbz, 1);
+       }
+      else
+       return false;
+    }
+  else if (TREE_CODE (mbz) == LT_EXPR)
+    {
+      tree op1 = TREE_OPERAND (mbz, 1);
+
+      /* Handle b < a + 1.  */
+      if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
+        {
+          a = TREE_OPERAND (op1, 0);
+          b = TREE_OPERAND (mbz, 0);
+        }
+      else
+       return false;
+    }
+  else
+    return false;
+
+  /* Expected number of iterations is B - A - 1.  Check that it matches
+     the actual number, i.e., that B - A - NITER = 1.  */
+  tree_to_aff_combination (niter->niter, nit_type, &nit);
+  tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
+  tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
+  aff_combination_scale (&nit, double_int_minus_one);
+  aff_combination_scale (&tmpa, double_int_minus_one);
+  aff_combination_add (&tmpb, &tmpa);
+  aff_combination_add (&tmpb, &nit);
+  if (tmpb.n != 0 || !double_int_equal_p (tmpb.offset, double_int_one))
+    return false;
+
+  /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
+     overflow.  */
+  offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
+                       cand->iv->step,
+                       fold_convert (TREE_TYPE (cand->iv->step), a));
+  if (!difference_cannot_overflow_p (cand->iv->base, offset))
+    return false;
+
+  /* Determine the new comparison operator.  */
+  comp = step < 0 ? GT_EXPR : LT_EXPR;
+  if (*comp_p == NE_EXPR)
+    *comp_p = comp;
+  else if (*comp_p == EQ_EXPR)
+    *comp_p = invert_tree_comparison (comp, false);
+  else
+    gcc_unreachable ();
+
+  return true;
+}
+
  /* Check whether it is possible to express the condition in USE by comparison
-   of candidate CAND.  If so, store the value compared with to BOUND.  */
+   of candidate CAND.  If so, store the value compared with to BOUND, and the
+   comparison operator to COMP.  */
  
  static bool
  may_eliminate_iv (struct ivopts_data *data,
-                 struct iv_use *use, struct iv_cand *cand, tree *bound)
+                 struct iv_use *use, struct iv_cand *cand, tree *bound,
+                 enum tree_code *comp)
  {
    basic_block ex_bb;
    edge exit;
-  tree nit, period;
+  tree period;
    struct loop *loop = data->current_loop;
    aff_tree bnd;
    struct tree_niter_desc *desc = NULL;
@@ -4326,8 +4651,8 @@ may_eliminate_iv (struct ivopts_data *data,
    if (flow_bb_inside_loop_p (loop, exit->dest))
      return false;
  
-  nit = niter_for_exit (data, exit, &desc);
-  if (!nit)
+  desc = niter_for_exit (data, exit);
+  if (!desc)
      return false;
  
    /* Determine whether we can use the variable to test the exit condition.
@@ -4336,17 +4661,17 @@ may_eliminate_iv (struct ivopts_data *data,
    period = iv_period (cand->iv);
  
    /* If the number of iterations is constant, compare against it directly.  */
-  if (TREE_CODE (nit) == INTEGER_CST)
+  if (TREE_CODE (desc->niter) == INTEGER_CST)
      {
        /* See cand_value_at.  */
        if (stmt_after_increment (loop, cand, use->stmt))
          {
-          if (!tree_int_cst_lt (nit, period))
+          if (!tree_int_cst_lt (desc->niter, period))
              return false;
          }
        else
          {
-          if (tree_int_cst_lt (period, nit))
+          if (tree_int_cst_lt (period, desc->niter))
              return false;
          }
      }
@@ -4365,7 +4690,7 @@ may_eliminate_iv (struct ivopts_data *data,
        if (double_int_ucmp (max_niter, period_value) > 0)
          {
            /* See if we can take advantage of infered loop bound information.  */
-          if (loop_only_exit_p (loop, exit))
+          if (data->loop_single_exit_p)
              {
                if (!estimated_loop_iterations (loop, true, &max_niter))
                  return false;
@@ -4378,16 +4703,46 @@ may_eliminate_iv (struct ivopts_data *data,
          }
      }
  
-  cand_value_at (loop, cand, use->stmt, nit, &bnd);
+  cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
  
    *bound = aff_combination_to_tree (&bnd);
+  *comp = iv_elimination_compare (data, use);
+
    /* It is unlikely that computing the number of iterations using division
       would be more profitable than keeping the original induction variable.  */
    if (expression_expensive_p (*bound))
      return false;
+
+  /* Sometimes, it is possible to handle the situation that the number of
+     iterations may be zero unless additional assumtions by using <
+     instead of != in the exit condition.
+
+     TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
+          base the exit condition on it.  However, that is often too
+          expensive.  */
+  if (!integer_zerop (desc->may_be_zero))
+    return iv_elimination_compare_lt (data, cand, comp, desc);
+
    return true;
  }
  
+ /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
+    be copied, if is is used in the loop body and DATA->body_includes_call.  */
+
+static int
+parm_decl_cost (struct ivopts_data *data, tree bound)
+{
+  tree sbound = bound;
+  STRIP_NOPS (sbound);
+
+  if (TREE_CODE (sbound) == SSA_NAME
+      && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
+      && gimple_nop_p (SSA_NAME_DEF_STMT (sbound))
+      && data->body_includes_call)
+    return COSTS_N_INSNS (1);
+
+  return 0;
+}
  
  /* Determines cost of basing replacement of USE on CAND in a condition.  */
  
@@ -4398,22 +4753,39 @@ determine_use_iv_cost_condition (struct ivopts_data *data,
    tree bound = NULL_TREE;
    struct iv *cmp_iv;
    bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
-  comp_cost elim_cost, express_cost, cost;
+  comp_cost elim_cost, express_cost, cost, bound_cost;
    bool ok;
-  int inv_expr_id = -1;
+  int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
    tree *control_var, *bound_cst;
+  enum tree_code comp = ERROR_MARK;
  
    /* Only consider real candidates.  */
    if (!cand->iv)
      {
-      set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
+      set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
+                      ERROR_MARK, -1);
        return false;
      }
  
    /* Try iv elimination.  */
-  if (may_eliminate_iv (data, use, cand, &bound))
+  if (may_eliminate_iv (data, use, cand, &bound, &comp))
      {
        elim_cost = force_var_cost (data, bound, &depends_on_elim);
+      if (elim_cost.cost == 0)
+        elim_cost.cost = parm_decl_cost (data, bound);
+      else if (TREE_CODE (bound) == INTEGER_CST)
+        elim_cost.cost = 0;
+      /* If we replace a loop condition 'i < n' with 'p < base + n',
+        depends_on_elim will have 'base' and 'n' set, which implies
+        that both 'base' and 'n' will be live during the loop.  More likely,
+        'base + n' will be loop invariant, resulting in only one live value
+        during the loop.  So in that case we clear depends_on_elim and set
+        elim_inv_expr_id instead.  */
+      if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
+       {
+         elim_inv_expr_id = get_expr_id (data, bound);
+         bitmap_clear (depends_on_elim);
+       }
        /* The bound is a loop invariant, so it will be only computed
          once.  */
        elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
@@ -4441,16 +4813,25 @@ determine_use_iv_cost_condition (struct ivopts_data *data,
  
    express_cost = get_computation_cost (data, use, cand, false,
                                        &depends_on_express, NULL,
-                                       &inv_expr_id);
+                                       &express_inv_expr_id);
    fd_ivopts_data = data;
    walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
  
+  /* Count the cost of the original bound as well.  */
+  bound_cost = force_var_cost (data, *bound_cst, NULL);
+  if (bound_cost.cost == 0)
+    bound_cost.cost = parm_decl_cost (data, *bound_cst);
+  else if (TREE_CODE (*bound_cst) == INTEGER_CST)
+    bound_cost.cost = 0;
+  express_cost.cost += bound_cost.cost;
+
    /* Choose the better approach, preferring the eliminated IV. */
    if (compare_costs (elim_cost, express_cost) <= 0)
      {
        cost = elim_cost;
        depends_on = depends_on_elim;
        depends_on_elim = NULL;
+      inv_expr_id = elim_inv_expr_id;
      }
    else
      {
@@ -4458,9 +4839,11 @@ determine_use_iv_cost_condition (struct ivopts_data *data,
        depends_on = depends_on_express;
        depends_on_express = NULL;
        bound = NULL_TREE;
+      comp = ERROR_MARK;
+      inv_expr_id = express_inv_expr_id;
      }
  
-  set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
+  set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
  
    if (depends_on_elim)
      BITMAP_FREE (depends_on_elim);
@@ -4664,6 +5047,11 @@ determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
  
    base = cand->iv->base;
    cost_base = force_var_cost (data, base, NULL);
+  /* It will be exceptional that the iv register happens to be initialized with
+     the proper value at no cost.  In general, there will at least be a regcopy
+     or a const set.  */
+  if (cost_base.cost == 0)
+    cost_base.cost = COSTS_N_INSNS (1);
    cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
  
    cost = cost_step + adjust_setup_cost (data, cost_base.cost);
@@ -5890,64 +6278,6 @@ rewrite_use_nonlinear_expr (struct ivopts_data *data,
      }
  }
  
-/* Copies the reference information from OLD_REF to NEW_REF.  */
-
-static void
-copy_ref_info (tree new_ref, tree old_ref)
-{
-  tree new_ptr_base = NULL_TREE;
-
-  TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
-  TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
-
-  new_ptr_base = TREE_OPERAND (new_ref, 0);
-
-  /* We can transfer points-to information from an old pointer
-     or decl base to the new one.  */
-  if (new_ptr_base
-      && TREE_CODE (new_ptr_base) == SSA_NAME
-      && !SSA_NAME_PTR_INFO (new_ptr_base))
-    {
-      tree base = get_base_address (old_ref);
-      if (!base)
-       ;
-      else if ((TREE_CODE (base) == MEM_REF
-               || TREE_CODE (base) == TARGET_MEM_REF)
-              && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
-              && SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)))
-       {
-         struct ptr_info_def *new_pi;
-         duplicate_ssa_name_ptr_info
-           (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
-         new_pi = SSA_NAME_PTR_INFO (new_ptr_base);
-         /* We have to be careful about transfering alignment information.  */
-         if (TREE_CODE (old_ref) == MEM_REF
-             && !(TREE_CODE (new_ref) == TARGET_MEM_REF
-                  && (TMR_INDEX2 (new_ref)
-                      || (TMR_STEP (new_ref)
-                          && (TREE_INT_CST_LOW (TMR_STEP (new_ref))
-                              < new_pi->align)))))
-           {
-             new_pi->misalign += double_int_sub (mem_ref_offset (old_ref),
-                                                 mem_ref_offset (new_ref)).low;
-             new_pi->misalign &= (new_pi->align - 1);
-           }
-         else
-           {
-             new_pi->align = 1;
-             new_pi->misalign = 0;
-           }
-       }
-      else if (TREE_CODE (base) == VAR_DECL
-              || TREE_CODE (base) == PARM_DECL
-              || TREE_CODE (base) == RESULT_DECL)
-       {
-         struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
-         pt_solution_set_var (&pi->pt, base);
-       }
-    }
-}
-
  /* Performs a peephole optimization to reorder the iv update statement with
     a mem ref to enable instruction combining in later phases. The mem ref uses
     the iv value before the update, so the reordering transformation requires
@@ -6095,7 +6425,7 @@ rewrite_use_compare (struct ivopts_data *data,
            fprintf (dump_file, "Replacing exit test: ");
            print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
          }
-      compare = iv_elimination_compare (data, use);
+      compare = cp->comp;
        bound = unshare_expr (fold_convert (var_type, bound));
        op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
        if (stmts)
@@ -6230,8 +6560,7 @@ free_loop_data (struct ivopts_data *data)
        struct version_info *info;
  
        info = ver_info (data, i);
-      if (info->iv)
-       free (info->iv);
+      free (info->iv);
        info->iv = NULL;
        info->has_nonlin_use = false;
        info->preserve_biv = false;
@@ -6258,8 +6587,7 @@ free_loop_data (struct ivopts_data *data)
      {
        struct iv_cand *cand = iv_cand (data, i);
  
-      if (cand->iv)
-       free (cand->iv);
+      free (cand->iv);
        if (cand->depends_on)
         BITMAP_FREE (cand->depends_on);
        free (cand);
@@ -6327,7 +6655,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
  {
    bool changed = false;
    struct iv_ca *iv_ca;
-  edge exit;
+  edge exit = single_dom_exit (loop);
    basic_block *body;
  
    gcc_assert (!data->niters);
@@ -6338,7 +6666,6 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
      {
        fprintf (dump_file, "Processing loop %d\n", loop->num);
  
-      exit = single_dom_exit (loop);
        if (exit)
         {
           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
@@ -6355,6 +6682,8 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
    renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
    free (body);
  
+  data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
+
    /* For each ssa name determines whether it behaves as an induction variable
       in some loop.  */
    if (!find_induction_variables (data))