PR c++/15815

[pf3gnuchains/gcc-fork.git] / gcc / combine.c
diff --git a/gcc/combine.c b/gcc/combine.c

index f828580..9f322d5 100644 (file)
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -91,6 +91,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  #include "toplev.h"
  #include "target.h"
  #include "rtlhooks-def.h"
+/* Include output.h for dump_file.  */
+#include "output.h"
  
  /* Number of attempts to combine instructions in this function.  */
  
@@ -282,6 +284,15 @@ static basic_block this_basic_block;
     those blocks as starting points.  */
  static sbitmap refresh_blocks;
  \f
+/* The following array records the combine_insn_cost for every insn
+   in the instruction stream.  */
+
+static int *uid_insn_cost;
+
+/* Length of the currently allocated uid_insn_cost array.  */
+
+static int last_insn_cost;
+
  /* Incremented for each label.  */
  
  static int label_tick;
@@ -504,6 +515,135 @@ do_SUBST_INT (int *into, int newval)
  
  #define SUBST_INT(INTO, NEWVAL)  do_SUBST_INT(&(INTO), (NEWVAL))
  \f
+/* Calculate the rtx_cost of a single instruction.  A return value of zero
+   indicates an instruction without a known cost.  */
+
+static int
+combine_insn_cost (rtx pat)
+{
+  int i, cost;
+  rtx set;
+
+  /* Extract the single set rtx from the instruction pattern.
+     We can't use single_set since we only have the pattern.  */
+  if (GET_CODE (pat) == SET)
+    set = pat;
+  else if (GET_CODE (pat) == PARALLEL)
+    {
+      set = NULL_RTX;
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+       {
+         rtx x = XVECEXP (pat, 0, i);
+         if (GET_CODE (x) == SET)
+           {
+             if (set)
+               return 0;
+             set = x;
+           }
+       }
+      if (!set)
+       return 0;
+    }
+  else
+    return 0;
+
+  cost = rtx_cost (SET_SRC (set), SET);
+  return cost > 0 ? cost : COSTS_N_INSNS (1);
+}
+
+/* Subroutine of try_combine.  Determine whether the combine replacement
+   patterns NEWPAT and NEWI2PAT are cheaper according to combine_insn_cost
+   that the original instruction sequence I1, I2 and I3.  Note that I1
+   and/or NEWI2PAT may be NULL_RTX.  This function returns false, if the
+   costs of all instructions can be estimated, and the replacements are
+   more expensive than the original sequence.  */
+
+static bool
+combine_validate_cost (rtx i1, rtx i2, rtx i3, rtx newpat, rtx newi2pat)
+{
+  int i1_cost, i2_cost, i3_cost;
+  int new_i2_cost, new_i3_cost;
+  int old_cost, new_cost;
+
+  /* Lookup the original combine_insn_costs.  */
+  i2_cost = INSN_UID (i2) <= last_insn_cost
+           ? uid_insn_cost[INSN_UID (i2)] : 0;
+  i3_cost = INSN_UID (i3) <= last_insn_cost
+           ? uid_insn_cost[INSN_UID (i3)] : 0;
+
+  if (i1)
+    {
+      i1_cost = INSN_UID (i1) <= last_insn_cost
+               ? uid_insn_cost[INSN_UID (i1)] : 0;
+      old_cost = (i1_cost > 0 && i2_cost > 0 && i3_cost > 0)
+                ? i1_cost + i2_cost + i3_cost : 0;
+    }
+  else
+    {
+      old_cost = (i2_cost > 0 && i3_cost > 0) ? i2_cost + i3_cost : 0;
+      i1_cost = 0;
+    }
+
+  /* Calculate the replacement combine_insn_costs.  */
+  new_i3_cost = combine_insn_cost (newpat);
+  if (newi2pat)
+    {
+      new_i2_cost = combine_insn_cost (newi2pat);
+      new_cost = (new_i2_cost > 0 && new_i3_cost > 0)
+                ? new_i2_cost + new_i3_cost : 0;
+    }
+  else
+    {
+      new_cost = new_i3_cost;
+      new_i2_cost = 0;
+    }
+
+  /* Disallow this recombination if both new_cost and old_cost are
+     greater than zero, and new_cost is greater than old cost.  */
+  if (!undobuf.other_insn
+      && old_cost > 0
+      && new_cost > old_cost)
+    {
+      if (dump_file)
+       {
+         if (i1)
+           {
+             fprintf (dump_file,
+                      "rejecting combination of insns %d, %d and %d\n",
+                      INSN_UID (i1), INSN_UID (i2), INSN_UID (i3));
+             fprintf (dump_file, "original costs %d + %d + %d = %d\n",
+                      i1_cost, i2_cost, i3_cost, old_cost);
+           }
+         else
+           {
+             fprintf (dump_file,
+                      "rejecting combination of insns %d and %d\n",
+                      INSN_UID (i2), INSN_UID (i3));
+             fprintf (dump_file, "original costs %d + %d = %d\n",
+                      i2_cost, i3_cost, old_cost);
+           }
+
+         if (newi2pat)
+           {
+             fprintf (dump_file, "replacement costs %d + %d = %d\n",
+                      new_i2_cost, new_i3_cost, new_cost);
+           }
+         else
+           fprintf (dump_file, "replacement cost %d\n", new_cost);
+       }
+
+      return false;
+    }
+
+  /* Update the uid_insn_cost array with the replacement costs.  */
+  uid_insn_cost[INSN_UID (i2)] = new_i2_cost;
+  uid_insn_cost[INSN_UID (i3)] = new_i3_cost;
+  if (i1)
+    uid_insn_cost[INSN_UID (i1)] = 0;
+
+  return true;
+}
+\f
  /* Main entry point for combiner.  F is the first insn of the function.
     NREGS is the first unused pseudo-reg number.
  
@@ -568,6 +708,10 @@ combine_instructions (rtx f, unsigned int nregs)
    refresh_blocks = sbitmap_alloc (last_basic_block);
    sbitmap_zero (refresh_blocks);
  
+  /* Allocate array of current combine_insn_costs.  */
+  uid_insn_cost = xcalloc (max_uid_cuid + 1, sizeof (int));
+  last_insn_cost = max_uid_cuid;
+
    for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
      {
        uid_cuid[INSN_UID (insn)] = ++i;
@@ -586,6 +730,12 @@ combine_instructions (rtx f, unsigned int nregs)
               set_nonzero_bits_and_sign_copies (XEXP (links, 0), NULL_RTX,
                                                 NULL);
  #endif
+
+         /* Record the current combine_insn_cost of this instruction.  */
+         uid_insn_cost[INSN_UID (insn)] = combine_insn_cost (PATTERN (insn));
+         if (dump_file)
+           fprintf(dump_file, "insn_cost %d: %d\n",
+                   INSN_UID (insn), uid_insn_cost[INSN_UID (insn)]);
         }
  
        if (GET_CODE (insn) == CODE_LABEL)
@@ -762,6 +912,7 @@ combine_instructions (rtx f, unsigned int nregs)
  
    /* Clean up.  */
    sbitmap_free (refresh_blocks);
+  free (uid_insn_cost);
    free (reg_stat);
    free (uid_cuid);
  
@@ -1083,7 +1234,7 @@ can_combine_p (rtx insn, rtx i3, rtx pred ATTRIBUTE_UNUSED, rtx succ,
          are intervening stores.  Also, don't move a volatile asm or
          UNSPEC_VOLATILE across any other insns.  */
        || (! all_adjacent
-         && (((GET_CODE (src) != MEM
+         && (((!MEM_P (src)
                 || ! find_reg_note (insn, REG_EQUIV, src))
                && use_crosses_set_p (src, INSN_CUID (insn)))
               || (GET_CODE (src) == ASM_OPERANDS && MEM_VOLATILE_P (src))
@@ -1282,7 +1433,7 @@ combinable_i3pat (rtx i3, rtx *loc, rtx i2dest, rtx i1dest,
          into the address of a MEM, so only prevent the combination if
          i1 or i2 set the same MEM.  */
        if ((inner_dest != dest &&
-          (GET_CODE (inner_dest) != MEM
+          (!MEM_P (inner_dest)
             || rtx_equal_p (i2dest, inner_dest)
             || (i1dest && rtx_equal_p (i1dest, inner_dest)))
            && (reg_overlap_mentioned_p (i2dest, inner_dest)
@@ -1763,7 +1914,7 @@ try_combine (rtx i3, rtx i2, rtx i1, int *new_direct_jump_p)
  #if 0
    if (!(GET_CODE (PATTERN (i3)) == SET
         && REG_P (SET_SRC (PATTERN (i3)))
-       && GET_CODE (SET_DEST (PATTERN (i3))) == MEM
+       && MEM_P (SET_DEST (PATTERN (i3)))
         && (GET_CODE (XEXP (SET_DEST (PATTERN (i3)), 0)) == POST_INC
             || GET_CODE (XEXP (SET_DEST (PATTERN (i3)), 0)) == POST_DEC)))
      /* It's not the exception.  */
@@ -2263,7 +2414,7 @@ try_combine (rtx i3, rtx i2, rtx i1, int *new_direct_jump_p)
  #ifdef INSN_SCHEDULING
           /* If *SPLIT is a paradoxical SUBREG, when we split it, it should
              be written as a ZERO_EXTEND.  */
-         if (split_code == SUBREG && GET_CODE (SUBREG_REG (*split)) == MEM)
+         if (split_code == SUBREG && MEM_P (SUBREG_REG (*split)))
             {
  #ifdef LOAD_EXTEND_OP
               /* Or as a SIGN_EXTEND if LOAD_EXTEND_OP says that that's
@@ -2504,6 +2655,14 @@ try_combine (rtx i3, rtx i2, rtx i1, int *new_direct_jump_p)
    }
  #endif
  
+  /* Only allow this combination if combine_insn_costs reports that the
+     replacement instructions are cheaper than the originals.  */
+  if (!combine_validate_cost (i1, i2, i3, newpat, newi2pat))
+    {
+      undo_all ();
+      return 0;
+    }
+
    /* We now know that we can do this combination.  Merge the insns and
       update the status of registers and LOG_LINKS.  */
  
@@ -2611,19 +2770,13 @@ try_combine (rtx i3, rtx i2, rtx i1, int *new_direct_jump_p)
         PATTERN (i2) = newi2pat;
        }
      else
-      {
-       PUT_CODE (i2, NOTE);
-       NOTE_LINE_NUMBER (i2) = NOTE_INSN_DELETED;
-       NOTE_SOURCE_FILE (i2) = 0;
-      }
+      SET_INSN_DELETED (i2);
  
      if (i1)
        {
         LOG_LINKS (i1) = 0;
         REG_NOTES (i1) = 0;
-       PUT_CODE (i1, NOTE);
-       NOTE_LINE_NUMBER (i1) = NOTE_INSN_DELETED;
-       NOTE_SOURCE_FILE (i1) = 0;
+       SET_INSN_DELETED (i1);
        }
  
      /* Get death notes for everything that is now used in either I3 or
@@ -2893,7 +3046,7 @@ find_split_point (rtx *loc, rtx insn)
  #ifdef INSN_SCHEDULING
        /* If we are making a paradoxical SUBREG invalid, it becomes a split
          point.  */
-      if (GET_CODE (SUBREG_REG (x)) == MEM)
+      if (MEM_P (SUBREG_REG (x)))
         return loc;
  #endif
        return find_split_point (&SUBREG_REG (x), insn);
@@ -3842,7 +3995,7 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
  
        /* Don't change the mode of the MEM if that would change the meaning
          of the address.  */
-      if (GET_CODE (SUBREG_REG (x)) == MEM
+      if (MEM_P (SUBREG_REG (x))
           && (MEM_VOLATILE_P (SUBREG_REG (x))
               || mode_dependent_address_p (XEXP (SUBREG_REG (x), 0))))
         return gen_rtx_CLOBBER (mode, const0_rtx);
@@ -5205,7 +5358,7 @@ simplify_set (rtx x)
        && SUBREG_BYTE (src) == 0
        && (GET_MODE_SIZE (GET_MODE (src))
           > GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))
-      && GET_CODE (SUBREG_REG (src)) == MEM)
+      && MEM_P (SUBREG_REG (src)))
      {
        SUBST (SET_SRC (x),
              gen_rtx_fmt_e (LOAD_EXTEND_OP (GET_MODE (SUBREG_REG (src))),
@@ -5986,7 +6139,7 @@ make_extraction (enum machine_mode mode, rtx inner, HOST_WIDE_INT pos,
          The subreg adds or removes high bits; its mode is
          irrelevant to the meaning of this extraction,
          since POS and LEN count from the lsb.  */
-      if (GET_CODE (SUBREG_REG (inner)) == MEM)
+      if (MEM_P (SUBREG_REG (inner)))
         is_mode = GET_MODE (SUBREG_REG (inner));
        inner = SUBREG_REG (inner);
      }
@@ -6027,11 +6180,11 @@ make_extraction (enum machine_mode mode, rtx inner, HOST_WIDE_INT pos,
    if (tmode != BLKmode
        && ! (spans_byte && inner_mode != tmode)
        && ((pos_rtx == 0 && (pos % BITS_PER_WORD) == 0
-          && GET_CODE (inner) != MEM
+          && !MEM_P (inner)
            && (! in_dest
                || (REG_P (inner)
                    && have_insn_for (STRICT_LOW_PART, tmode))))
-         || (GET_CODE (inner) == MEM && pos_rtx == 0
+         || (MEM_P (inner) && pos_rtx == 0
               && (pos
                   % (STRICT_ALIGNMENT ? GET_MODE_ALIGNMENT (tmode)
                      : BITS_PER_UNIT)) == 0
@@ -6049,7 +6202,7 @@ make_extraction (enum machine_mode mode, rtx inner, HOST_WIDE_INT pos,
          If INNER is not a MEM, get a piece consisting of just the field
          of interest (in this case POS % BITS_PER_WORD must be 0).  */
  
-      if (GET_CODE (inner) == MEM)
+      if (MEM_P (inner))
         {
           HOST_WIDE_INT offset;
  
@@ -6108,7 +6261,7 @@ make_extraction (enum machine_mode mode, rtx inner, HOST_WIDE_INT pos,
          make a STRICT_LOW_PART unless we made a MEM.  */
  
        if (in_dest)
-       return (GET_CODE (new) == MEM ? new
+       return (MEM_P (new) ? new
                 : (GET_CODE (new) != SUBREG
                    ? gen_rtx_CLOBBER (tmode, const0_rtx)
                    : gen_rtx_STRICT_LOW_PART (VOIDmode, new)));
@@ -6159,7 +6312,7 @@ make_extraction (enum machine_mode mode, rtx inner, HOST_WIDE_INT pos,
       length is not 1.  In all other cases, we would only be going outside
       our object in cases when an original shift would have been
       undefined.  */
-  if (! spans_byte && GET_CODE (inner) == MEM
+  if (! spans_byte && MEM_P (inner)
        && ((pos_rtx == 0 && pos + len > GET_MODE_BITSIZE (is_mode))
           || (pos_rtx != 0 && len != 1)))
      return 0;
@@ -6202,7 +6355,7 @@ make_extraction (enum machine_mode mode, rtx inner, HOST_WIDE_INT pos,
    /* If this is not from memory, the desired mode is wanted_inner_reg_mode;
       if we have to change the mode of memory and cannot, the desired mode is
       EXTRACTION_MODE.  */
-  if (GET_CODE (inner) != MEM)
+  if (!MEM_P (inner))
      wanted_inner_mode = wanted_inner_reg_mode;
    else if (inner_mode != wanted_inner_mode
            && (mode_dependent_address_p (XEXP (inner, 0))
@@ -6220,7 +6373,7 @@ make_extraction (enum machine_mode mode, rtx inner, HOST_WIDE_INT pos,
          If it's a MEM we need to recompute POS relative to that.
          However, if we're extracting from (or inserting into) a register,
          we want to recompute POS relative to wanted_inner_mode.  */
-      int width = (GET_CODE (inner) == MEM
+      int width = (MEM_P (inner)
                    ? GET_MODE_BITSIZE (is_mode)
                    : GET_MODE_BITSIZE (wanted_inner_mode));
  
@@ -6230,7 +6383,7 @@ make_extraction (enum machine_mode mode, rtx inner, HOST_WIDE_INT pos,
         pos_rtx
           = gen_rtx_MINUS (GET_MODE (pos_rtx), GEN_INT (width - len), pos_rtx);
        /* POS may be less than 0 now, but we check for that below.
-        Note that it can only be less than 0 if GET_CODE (inner) != MEM.  */
+        Note that it can only be less than 0 if !MEM_P (inner).  */
      }
  
    /* If INNER has a wider mode, make it smaller.  If this is a constant
@@ -6238,7 +6391,7 @@ make_extraction (enum machine_mode mode, rtx inner, HOST_WIDE_INT pos,
       the value.  */
    if (wanted_inner_mode != VOIDmode
        && GET_MODE_SIZE (wanted_inner_mode) < GET_MODE_SIZE (is_mode)
-      && ((GET_CODE (inner) == MEM
+      && ((MEM_P (inner)
            && (inner_mode == wanted_inner_mode
                || (! mode_dependent_address_p (XEXP (inner, 0))
                    && ! MEM_VOLATILE_P (inner))))))
@@ -6276,7 +6429,7 @@ make_extraction (enum machine_mode mode, rtx inner, HOST_WIDE_INT pos,
    /* If INNER is not memory, we can always get it into the proper mode.  If we
       are changing its mode, POS must be a constant and smaller than the size
       of the new mode.  */
-  else if (GET_CODE (inner) != MEM)
+  else if (!MEM_P (inner))
      {
        if (GET_MODE (inner) != wanted_inner_mode
           && (pos_rtx != 0
@@ -7618,14 +7771,14 @@ rtx_equal_for_field_assignment_p (rtx x, rtx y)
    /* Check for a paradoxical SUBREG of a MEM compared with the MEM.
       Note that all SUBREGs of MEM are paradoxical; otherwise they
       would have been rewritten.  */
-  if (GET_CODE (x) == MEM && GET_CODE (y) == SUBREG
-      && GET_CODE (SUBREG_REG (y)) == MEM
+  if (MEM_P (x) && GET_CODE (y) == SUBREG
+      && MEM_P (SUBREG_REG (y))
        && rtx_equal_p (SUBREG_REG (y),
                       gen_lowpart (GET_MODE (SUBREG_REG (y)), x)))
      return 1;
  
-  if (GET_CODE (y) == MEM && GET_CODE (x) == SUBREG
-      && GET_CODE (SUBREG_REG (x)) == MEM
+  if (MEM_P (y) && GET_CODE (x) == SUBREG
+      && MEM_P (SUBREG_REG (x))
        && rtx_equal_p (SUBREG_REG (x),
                       gen_lowpart (GET_MODE (SUBREG_REG (x)), y)))
      return 1;
@@ -9195,7 +9348,7 @@ gen_lowpart_for_combine (enum machine_mode mode, rtx x)
    /* X might be a paradoxical (subreg (mem)).  In that case, gen_lowpart
       won't know what to do.  So we will strip off the SUBREG here and
       process normally.  */
-  if (GET_CODE (x) == SUBREG && GET_CODE (SUBREG_REG (x)) == MEM)
+  if (GET_CODE (x) == SUBREG && MEM_P (SUBREG_REG (x)))
      {
        x = SUBREG_REG (x);
        if (GET_MODE (x) == mode)
@@ -9216,7 +9369,7 @@ gen_lowpart_for_combine (enum machine_mode mode, rtx x)
    if (result)
      return result;
  
-  if (GET_CODE (x) == MEM)
+  if (MEM_P (x))
      {
        int offset = 0;
  
@@ -9657,7 +9810,7 @@ simplify_comparison (enum rtx_code code, rtx *pop0, rtx *pop1)
           break;
  
         case GEU:
-         /* >= C is equivalent to < (C - 1).  */
+         /* >= C is equivalent to > (C - 1).  */
           if (const_op > 1)
             {
               const_op -= 1;
@@ -10746,7 +10899,7 @@ record_dead_and_set_regs_1 (rtx dest, rtx setter, void *data)
        else
         record_value_for_reg (dest, record_dead_insn, NULL_RTX);
      }
-  else if (GET_CODE (dest) == MEM
+  else if (MEM_P (dest)
            /* Ignore pushes, they clobber nothing.  */
            && ! push_operand (dest, GET_MODE (dest)))
      mem_last_set = INSN_CUID (record_dead_insn);
@@ -10935,7 +11088,7 @@ get_last_value_validate (rtx *loc, rtx insn, int tick, int replace)
    /* If this is a memory reference, make sure that there were
       no stores after it that might have clobbered the value.  We don't
       have alias info, so we assume any store invalidates it.  */
-  else if (GET_CODE (x) == MEM && ! RTX_UNCHANGING_P (x)
+  else if (MEM_P (x) && ! RTX_UNCHANGING_P (x)
            && INSN_CUID (insn) <= mem_last_set)
      {
        if (replace)
@@ -11231,7 +11384,7 @@ mark_used_regs_combine (rtx x)
      case CLOBBER:
        /* If we are clobbering a MEM, mark any hard registers inside the
          address as used.  */
-      if (GET_CODE (XEXP (x, 0)) == MEM)
+      if (MEM_P (XEXP (x, 0)))
         mark_used_regs_combine (XEXP (XEXP (x, 0), 0));
        return;
  
@@ -11272,7 +11425,7 @@ mark_used_regs_combine (rtx x)
                || GET_CODE (testreg) == STRICT_LOW_PART)
           testreg = XEXP (testreg, 0);
  
-       if (GET_CODE (testreg) == MEM)
+       if (MEM_P (testreg))
           mark_used_regs_combine (XEXP (testreg, 0));
  
         mark_used_regs_combine (SET_SRC (x));
@@ -11471,7 +11624,7 @@ move_deaths (rtx x, rtx maybe_kill_insn, int from_cuid, rtx to_insn,
          For a REG (the only other possibility), the entire value is
          being replaced so the old value is not used in this insn.  */
  
-      if (GET_CODE (dest) == MEM)
+      if (MEM_P (dest))
         move_deaths (XEXP (dest, 0), maybe_kill_insn, from_cuid,
                      to_insn, pnotes);
        return;
@@ -11886,9 +12039,7 @@ distribute_notes (rtx notes, rtx from_insn, rtx i3, rtx i2)
                           distribute_notes (old_notes, tem, tem, NULL_RTX);
                           distribute_links (LOG_LINKS (tem));
  
-                         PUT_CODE (tem, NOTE);
-                         NOTE_LINE_NUMBER (tem) = NOTE_INSN_DELETED;
-                         NOTE_SOURCE_FILE (tem) = 0;
+                         SET_INSN_DELETED (tem);
  
  #ifdef HAVE_cc0
                           /* Delete the setter too.  */
@@ -11902,10 +12053,7 @@ distribute_notes (rtx notes, rtx from_insn, rtx i3, rtx i2)
                                                 cc0_setter, NULL_RTX);
                               distribute_links (LOG_LINKS (cc0_setter));
  
-                             PUT_CODE (cc0_setter, NOTE);
-                             NOTE_LINE_NUMBER (cc0_setter)
-                               = NOTE_INSN_DELETED;
-                             NOTE_SOURCE_FILE (cc0_setter) = 0;
+                             SET_INSN_DELETED (cc0_setter);
                             }
  #endif
                         }
@@ -12208,7 +12356,7 @@ unmentioned_reg_p_1 (rtx *loc, void *expr)
    rtx x = *loc;
  
    if (x != NULL_RTX
-      && (REG_P (x) || GET_CODE (x) == MEM)
+      && (REG_P (x) || MEM_P (x))
        && ! reg_mentioned_p (x, (rtx) expr))
      return 1;
    return 0;