* calls.c (expand_call): When copying unaligned values into a register,

[pf3gnuchains/gcc-fork.git] / gcc / expmed.c
diff --git a/gcc/expmed.c b/gcc/expmed.c

index 3802eec..65fb007 100644 (file)
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -1,6 +1,6 @@
  /* Medium-level subroutines: convert bit-field store and extract
     and shifts, multiplies and divides to rtl instructions.
-   Copyright (C) 1987, 88, 89, 92, 93, 1994 Free Software Foundation, Inc.
+   Copyright (C) 1987, 88, 89, 92-6, 1997 Free Software Foundation, Inc.
  
  This file is part of GNU CC.
  
@@ -16,7 +16,8 @@ GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
  along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
  
  
  #include "config.h"
@@ -61,11 +62,16 @@ static int sdiv_pow2_cheap, smod_pow2_cheap;
  #define MAX_BITS_PER_WORD BITS_PER_WORD
  #endif
  
-/* Cost of various pieces of RTL.  */
+/* Cost of various pieces of RTL.  Note that some of these are indexed by shift count,
+   and some by mode.  */
  static int add_cost, negate_cost, zero_cost;
  static int shift_cost[MAX_BITS_PER_WORD];
  static int shiftadd_cost[MAX_BITS_PER_WORD];
  static int shiftsub_cost[MAX_BITS_PER_WORD];
+static int mul_cost[NUM_MACHINE_MODES];
+static int div_cost[NUM_MACHINE_MODES];
+static int mul_widen_cost[NUM_MACHINE_MODES];
+static int mul_highpart_cost[NUM_MACHINE_MODES];
  
  void
  init_expmed ()
@@ -77,6 +83,7 @@ init_expmed ()
    rtx shift_insn, shiftadd_insn, shiftsub_insn;
    int dummy;
    int m;
+  enum machine_mode mode, wider_mode;
  
    start_sequence ();
  
@@ -138,6 +145,32 @@ init_expmed ()
      = (rtx_cost (gen_rtx (MOD, word_mode, reg, GEN_INT (32)), SET)
         <= 2 * add_cost);
  
+  for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
+       mode != VOIDmode;
+       mode = GET_MODE_WIDER_MODE (mode))
+    {
+      reg = gen_rtx (REG, mode, 10000);
+      div_cost[(int) mode] = rtx_cost (gen_rtx (UDIV, mode, reg, reg), SET);
+      mul_cost[(int) mode] = rtx_cost (gen_rtx (MULT, mode, reg, reg), SET);
+      wider_mode = GET_MODE_WIDER_MODE (mode);
+      if (wider_mode != VOIDmode)
+       {
+         mul_widen_cost[(int) wider_mode]
+           = rtx_cost (gen_rtx (MULT, wider_mode,
+                                gen_rtx (ZERO_EXTEND, wider_mode, reg),
+                                gen_rtx (ZERO_EXTEND, wider_mode, reg)),
+                       SET);
+         mul_highpart_cost[(int) mode]
+           = rtx_cost (gen_rtx (TRUNCATE, mode,
+                                gen_rtx (LSHIFTRT, wider_mode,
+                                         gen_rtx (MULT, wider_mode,
+                                                  gen_rtx (ZERO_EXTEND, wider_mode, reg),
+                                                  gen_rtx (ZERO_EXTEND, wider_mode, reg)),
+                                         GEN_INT (GET_MODE_BITSIZE (mode)))),
+                       SET);
+       }
+    }
+
    /* Free the objects we just allocated.  */
    end_sequence ();
    obfree (free_point);
@@ -152,21 +185,12 @@ negate_rtx (mode, x)
       enum machine_mode mode;
       rtx x;
  {
-  if (GET_CODE (x) == CONST_INT)
-    {
-      HOST_WIDE_INT val = - INTVAL (x);
-      if (GET_MODE_BITSIZE (mode) < HOST_BITS_PER_WIDE_INT)
-       {
-         /* Sign extend the value from the bits that are significant.  */
-         if (val & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
-           val |= (HOST_WIDE_INT) (-1) << GET_MODE_BITSIZE (mode);
-         else
-           val &= ((HOST_WIDE_INT) 1 << GET_MODE_BITSIZE (mode)) - 1;
-       }
-      return GEN_INT (val);
-    }
-  else
-    return expand_unop (GET_MODE (x), neg_optab, x, NULL_RTX, 0);
+  rtx result = simplify_unary_operation (NEG, mode, x, mode);
+
+  if (result == 0)
+    result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
+
+  return result;
  }
  \f
  /* Generate code to store value from rtx VALUE
@@ -365,6 +389,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
  
  #ifdef HAVE_insv
    if (HAVE_insv
+      && GET_MODE (value) != BLKmode
        && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
        /* Ensure insv's size is wide enough for this field.  */
        && (GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_insv][3])
@@ -384,13 +409,13 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
        int save_volatile_ok = volatile_ok;
        volatile_ok = 1;
  
-      /* If this machine's insv can only insert into a register, or if we
-        are to force MEMs into a register, copy OP0 into a register and
-        save it back later.  */
+      /* If this machine's insv can only insert into a register, copy OP0
+        into a register and save it back later.  */
+      /* This used to check flag_force_mem, but that was a serious
+        de-optimization now that flag_force_mem is enabled by -O2.  */
        if (GET_CODE (op0) == MEM
-         && (flag_force_mem
-             || ! ((*insn_operand_predicate[(int) CODE_FOR_insv][0])
-                   (op0, VOIDmode))))
+         && ! ((*insn_operand_predicate[(int) CODE_FOR_insv][0])
+               (op0, VOIDmode)))
         {
           rtx tempreg;
           enum machine_mode bestmode;
@@ -409,7 +434,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
             bestmode = GET_MODE (op0);
  
           if (bestmode == VOIDmode
-             || (STRICT_ALIGNMENT && GET_MODE_SIZE (bestmode) > align))
+             || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
             goto insv_loses;
  
           /* Adjust address to point to the containing unit of that mode.  */
@@ -532,6 +557,9 @@ store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
    int all_zero = 0;
    int all_one = 0;
  
+  if (! SLOW_UNALIGNED_ACCESS)
+    struct_align = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
+    
    /* There is a case not handled here:
       a structure with a known alignment of just a halfword
       and a field split across two aligned halfwords within the structure.
@@ -715,7 +743,9 @@ store_split_bit_field (op0, bitsize, bitpos, value, align)
         value = word;
        else
         value = gen_lowpart_common (word_mode,
-                                   force_reg (GET_MODE (value), value));
+                                   force_reg (GET_MODE (value) != VOIDmode
+                                              ? GET_MODE (value)
+                                              : word_mode, value));
      }
  
    while (bitsdone < bitsize)
@@ -736,6 +766,16 @@ store_split_bit_field (op0, bitsize, bitpos, value, align)
  
        if (BYTES_BIG_ENDIAN)
         {
+         int total_bits;
+
+         /* We must do an endian conversion exactly the same way as it is
+            done in extract_bit_field, so that the two calls to
+            extract_fixed_bit_field will have comparable arguments.  */
+         if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
+           total_bits = BITS_PER_WORD;
+         else
+           total_bits = GET_MODE_BITSIZE (GET_MODE (value));
+
           /* Fetch successively less significant portions.  */
           if (GET_CODE (value) == CONST_INT)
             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
@@ -744,11 +784,19 @@ store_split_bit_field (op0, bitsize, bitpos, value, align)
           else
             /* The args are chosen so that the last part includes the
                lsb.  Give extract_bit_field the value it needs (with
-              endianness compensation) to fetch the piece we want.  */
-           part = extract_fixed_bit_field (word_mode, value, 0, thissize,
-                                           GET_MODE_BITSIZE (GET_MODE (value))
-                                           - bitsize + bitsdone,
-                                           NULL_RTX, 1, align);
+              endianness compensation) to fetch the piece we want.
+
+              ??? We have no idea what the alignment of VALUE is, so
+              we have to use a guess.  */
+           part
+             = extract_fixed_bit_field
+               (word_mode, value, 0, thissize,
+                total_bits - bitsize + bitsdone, NULL_RTX, 1,
+                GET_MODE (value) == VOIDmode
+                ? UNITS_PER_WORD
+                : (GET_MODE (value) == BLKmode
+                   ? 1
+                   : GET_MODE_ALIGNMENT (GET_MODE (value)) / BITS_PER_UNIT));
         }
        else
         {
@@ -758,8 +806,14 @@ store_split_bit_field (op0, bitsize, bitpos, value, align)
                              >> bitsdone)
                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
           else
-           part = extract_fixed_bit_field (word_mode, value, 0, thissize,
-                                           bitsdone, NULL_RTX, 1, align);
+           part
+             = extract_fixed_bit_field
+               (word_mode, value, 0, thissize, bitsdone, NULL_RTX, 1,
+                GET_MODE (value) == VOIDmode
+                ? UNITS_PER_WORD
+                : (GET_MODE (value) == BLKmode
+                   ? 1
+                   : GET_MODE_ALIGNMENT (GET_MODE (value)) / BITS_PER_UNIT));
         }
  
        /* If OP0 is a register, then handle OFFSET here.
@@ -831,9 +885,6 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
    rtx spec_target = target;
    rtx spec_target_subreg = 0;
  
-  if (GET_CODE (str_rtx) == MEM && ! MEM_IN_STRUCT_P (str_rtx))
-    abort ();
-
    /* Discount the part of the structure before the desired byte.
       We need to know how many bytes are safe to reference after it.  */
    if (total_size >= 0)
@@ -844,9 +895,27 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
      tmode = mode;
    while (GET_CODE (op0) == SUBREG)
      {
+      int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
+      int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
+
        offset += SUBREG_WORD (op0);
+
+      if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
+       {
+         bitpos += inner_size - outer_size;
+         if (bitpos > unit)
+           {
+             offset += (bitpos / unit);
+             bitpos %= unit;
+           }
+       }
+
        op0 = SUBREG_REG (op0);
      }
+
+  /* ??? We currently assume TARGET is at least as big as BITSIZE.
+     If that's wrong, the solution is to test for it and set TARGET to 0
+     if needed.  */
    
    /* If OP0 is a register, BITPOS must count within a word.
       But as we have it, it counts within whatever size OP0 now has.
@@ -862,7 +931,9 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
       So too extracting a subword value in
       the least significant part of the register.  */
  
-  if ((GET_CODE (op0) == REG
+  if (((GET_CODE (op0) == REG
+       && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
+                                 GET_MODE_BITSIZE (GET_MODE (op0))))
         || (GET_CODE (op0) == MEM
            && (! SLOW_UNALIGNED_ACCESS
                || (offset * BITS_PER_UNIT % bitsize == 0
@@ -905,11 +976,18 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
        if (target == 0 || GET_CODE (target) != REG)
         target = gen_reg_rtx (mode);
  
+      /* Indicate for flow that the entire target reg is being set.  */
+      emit_insn (gen_rtx (CLOBBER, VOIDmode, target));
+
        for (i = 0; i < nwords; i++)
         {
           /* If I is 0, use the low-order word in both field and target;
              if I is 1, use the next to lowest word; and so on.  */
-         int wordnum = (WORDS_BIG_ENDIAN ? nwords - i - 1 : i);
+         /* Word number in TARGET to use.  */
+         int wordnum = (WORDS_BIG_ENDIAN
+                        ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
+                        : i);
+         /* Offset from start of field in OP0.  */
           int bit_offset = (WORDS_BIG_ENDIAN
                             ? MAX (0, bitsize - (i + 1) * BITS_PER_WORD)
                             : i * BITS_PER_WORD);
@@ -929,7 +1007,24 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
         }
  
        if (unsignedp)
-       return target;
+       {
+         /* Unless we've filled TARGET, the upper regs in a multi-reg value
+            need to be zero'd out.  */
+         if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
+           {
+             int i,total_words;
+
+             total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
+             for (i = nwords; i < total_words; i++)
+               {
+                 int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
+                 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
+                 emit_move_insn (target_part, const0_rtx);
+               }
+           }
+         return target;
+       }
+
        /* Signed bit field: sign-extend with two arithmetic shifts.  */
        target = expand_shift (LSHIFT_EXPR, mode, target,
                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
@@ -1012,7 +1107,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
                     bestmode = GET_MODE (xop0);
  
                   if (bestmode == VOIDmode
-                     || (STRICT_ALIGNMENT && GET_MODE_SIZE (bestmode) > align))
+                     || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
                     goto extzv_loses;
  
                   /* Compute offset as multiple of this unit,
@@ -1148,7 +1243,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
                     bestmode = GET_MODE (xop0);
  
                   if (bestmode == VOIDmode
-                     || (STRICT_ALIGNMENT && GET_MODE_SIZE (bestmode) > align))
+                     || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
                     goto extv_loses;
  
                   /* Compute offset as multiple of this unit,
@@ -1378,7 +1473,7 @@ extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
  #ifdef SLOW_ZERO_EXTEND
           /* Always generate an `and' if
              we just zero-extended op0 and SLOW_ZERO_EXTEND, since it
-            will combine fruitfully with the zero-extend. */
+            will combine fruitfully with the zero-extend.  */
           || tmode != mode
  #endif
  #endif
@@ -1424,7 +1519,8 @@ extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
  /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
     of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
     complement of that if COMPLEMENT.  The mask is truncated if
-   necessary to the width of mode MODE.  */
+   necessary to the width of mode MODE.  The mask is zero-extended if
+   BITSIZE+BITPOS is too small for MODE.  */
  
  static rtx
  mask_rtx (mode, bitpos, bitsize, complement)
@@ -1653,7 +1749,7 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
  
    op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
  
-#if SHIFT_COUNT_TRUNCATED
+#ifdef SHIFT_COUNT_TRUNCATED
    if (SHIFT_COUNT_TRUNCATED
        && GET_CODE (op1) == CONST_INT
        && (unsigned HOST_WIDE_INT) INTVAL (op1) >= GET_MODE_BITSIZE (mode))
@@ -1682,8 +1778,7 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
             continue;
           else if (methods == OPTAB_LIB_WIDEN)
             {
-             /* If we are rotating by a constant that is valid and
-                we have been unable to open-code this by a rotation,
+             /* If we have been unable to open-code this by a rotation,
                  do it as the IOR of two shifts.  I.e., to rotate A
                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
                  where C is the bitsize of A.
@@ -1695,25 +1790,25 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
                  this extremely unlikely lossage to avoid complicating the
                  code below.  */
  
-             if (GET_CODE (op1) == CONST_INT && INTVAL (op1) > 0
-                 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
-               {
-                 rtx subtarget = target == shifted ? 0 : target;
-                 rtx temp1;
-                 tree other_amount
-                   = build_int_2 (GET_MODE_BITSIZE (mode) - INTVAL (op1), 0);
-
-                 shifted = force_reg (mode, shifted);
-
-                 temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
-                                      mode, shifted, amount, subtarget, 1);
-                 temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
-                                       mode, shifted, other_amount, 0, 1);
-                 return expand_binop (mode, ior_optab, temp, temp1, target,
-                                      unsignedp, methods);
-               }
-             else
-               methods = OPTAB_LIB;
+             rtx subtarget = target == shifted ? 0 : target;
+             rtx temp1;
+             tree type = TREE_TYPE (amount);
+             tree new_amount = make_tree (type, op1);
+             tree other_amount
+               = fold (build (MINUS_EXPR, type,
+                              convert (type,
+                                       build_int_2 (GET_MODE_BITSIZE (mode),
+                                                    0)),
+                              amount));
+
+             shifted = force_reg (mode, shifted);
+
+             temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
+                                  mode, shifted, new_amount, subtarget, 1);
+             temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
+                                   mode, shifted, other_amount, 0, 1);
+             return expand_binop (mode, ior_optab, temp, temp1, target,
+                                  unsignedp, methods);
             }
  
           temp = expand_binop (mode,
@@ -2382,7 +2477,7 @@ invert_mod2n (x, n)
       unsigned HOST_WIDE_INT x;
       int n;
  {
-  /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y. */
+  /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
  
    /* The algorithm notes that the choice y = x satisfies
       x*y == 1 mod 2^3, since x is assumed odd.
@@ -2445,14 +2540,17 @@ expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
  
     MODE is the mode of operation and result.
  
-   UNSIGNEDP nonzero means unsigned multiply.  */
+   UNSIGNEDP nonzero means unsigned multiply.
+
+   MAX_COST is the total allowed cost for the expanded RTL.  */
  
  rtx
-expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
+expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
       enum machine_mode mode;
       register rtx op0, target;
       unsigned HOST_WIDE_INT cnst1;
       int unsignedp;
+     int max_cost;
  {
    enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
    optab mul_highpart_optab;
@@ -2479,7 +2577,8 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
  
    /* expand_mult handles constant multiplication of word_mode
       or narrower.  It does a poor job for large modes.  */
-  if (size < BITS_PER_WORD)
+  if (size < BITS_PER_WORD
+      && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
      {
        /* We have to do this, since expand_binop doesn't do conversion for
          multiply.  Maybe change expand_binop to handle widening multiply?  */
@@ -2488,7 +2587,7 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
        tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, unsignedp);
        tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
                           build_int_2 (size, 0), NULL_RTX, 1);
-      return gen_lowpart (mode, tem);
+      return convert_modes (mode, wider_mode, tem, unsignedp);
      }
  
    if (target == 0)
@@ -2496,65 +2595,87 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
  
    /* Firstly, try using a multiplication insn that only generates the needed
       high part of the product, and in the sign flavor of unsignedp.  */
-  mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
-  target = expand_binop (mode, mul_highpart_optab,
-                        op0, op1, target, unsignedp, OPTAB_DIRECT);
-  if (target)
-    return target;
+  if (mul_highpart_cost[(int) mode] < max_cost)
+    {
+      mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
+      target = expand_binop (mode, mul_highpart_optab,
+                            op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
+      if (target)
+       return target;
+    }
  
    /* Secondly, same as above, but use sign flavor opposite of unsignedp.
       Need to adjust the result after the multiplication.  */
-  mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
-  target = expand_binop (mode, mul_highpart_optab,
-                        op0, op1, target, unsignedp, OPTAB_DIRECT);
-  if (target)
-    /* We used the wrong signedness.  Adjust the result.  */
-    return expand_mult_highpart_adjust (mode, target, op0,
-                                       op1, target, unsignedp);
-
-  /* Thirdly, we try to use a widening multiplication, or a wider mode
-     multiplication.  */
+  if (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)
+    {
+      mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
+      target = expand_binop (mode, mul_highpart_optab,
+                            op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
+      if (target)
+       /* We used the wrong signedness.  Adjust the result.  */
+       return expand_mult_highpart_adjust (mode, target, op0,
+                                           op1, target, unsignedp);
+    }
  
+  /* Try widening multiplication.  */
    moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
-  if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing)
-    ;
-  else if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing)
-    moptab = smul_optab;
-  else
+  if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
+      && mul_widen_cost[(int) wider_mode] < max_cost)
+    {
+      op1 = force_reg (mode, op1);
+      goto try;
+    } 
+
+  /* Try widening the mode and perform a non-widening multiplication.  */
+  moptab = smul_optab;
+  if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
+      && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
      {
-      /* Try widening multiplication of opposite signedness, and adjust.  */
-      moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
-      if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing)
+      op1 = wide_op1;
+      goto try;
+    }
+
+  /* Try widening multiplication of opposite signedness, and adjust.  */
+  moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
+  if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
+      && (mul_widen_cost[(int) wider_mode]
+         + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
+    {
+      rtx regop1 = force_reg (mode, op1);
+      tem = expand_binop (wider_mode, moptab, op0, regop1,
+                         NULL_RTX, ! unsignedp, OPTAB_WIDEN);
+      if (tem != 0)
         {
-         tem = expand_binop (wider_mode, moptab, op0, wide_op1,
-                             NULL_RTX, ! unsignedp, OPTAB_WIDEN);
-         if (tem != 0)
-           {
-             /* Extract the high half of the just generated product.  */
-             tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-                                 build_int_2 (size, 0), NULL_RTX, 1);
-             tem = gen_lowpart (mode, tem);
-             /* We used the wrong signedness.  Adjust the result.  */
-             return expand_mult_highpart_adjust (mode, tem, op0, op1,
-                                                 target, unsignedp);
-           }
+         /* Extract the high half of the just generated product.  */
+         tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
+                             build_int_2 (size, 0), NULL_RTX, 1);
+         tem = convert_modes (mode, wider_mode, tem, unsignedp);
+         /* We used the wrong signedness.  Adjust the result.  */
+         return expand_mult_highpart_adjust (mode, tem, op0, op1,
+                                             target, unsignedp);
         }
-
-      /* As a last resort, try widening the mode and perform a
-        non-widening multiplication.  */
-      moptab = smul_optab;
      }
  
+  return 0;
+
+ try:
    /* Pass NULL_RTX as target since TARGET has wrong mode.  */
-  tem = expand_binop (wider_mode, moptab, op0, wide_op1,
+  tem = expand_binop (wider_mode, moptab, op0, op1,
                       NULL_RTX, unsignedp, OPTAB_WIDEN);
    if (tem == 0)
      return 0;
  
    /* Extract the high half of the just generated product.  */
-  tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-                     build_int_2 (size, 0), NULL_RTX, 1);
-  return gen_lowpart (mode, tem);
+  if (mode == word_mode)
+    {
+      return gen_highpart (mode, tem);
+    }
+  else
+    {
+      tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
+                         build_int_2 (size, 0), NULL_RTX, 1);
+      return convert_modes (mode, wider_mode, tem, unsignedp);
+    }
  }
  \f
  /* Emit the code to divide OP0 by OP1, putting the result in TARGET
@@ -2592,6 +2713,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
    rtx insn, set;
    optab optab1, optab2;
    int op1_is_constant, op1_is_pow2;
+  int max_cost, extra_cost;
  
    op1_is_constant = GET_CODE (op1) == CONST_INT;
    op1_is_pow2 = (op1_is_constant
@@ -2696,11 +2818,14 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
    size = GET_MODE_BITSIZE (compute_mode);
  #if 0
    /* It should be possible to restrict the precision to GET_MODE_BITSIZE
-     (mode), and thereby get better code when OP1 is a constant.  Do that for
-     GCC 2.7.  It will require going over all usages of SIZE below.  */
+     (mode), and thereby get better code when OP1 is a constant.  Do that
+     later.  It will require going over all usages of SIZE below.  */
    size = GET_MODE_BITSIZE (mode);
  #endif
  
+  max_cost = div_cost[(int) compute_mode]
+    - (rem_flag ? mul_cost[(int) compute_mode] + add_cost : 0);
+
    /* Now convert to the best mode to use.  */
    if (compute_mode != mode)
      {
@@ -2722,7 +2847,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
  
    last = get_last_insn ();
  
-  /* Promote floor rouding to trunc rounding for unsigned operations.  */
+  /* Promote floor rounding to trunc rounding for unsigned operations.  */
    if (unsignedp)
      {
        if (code == FLOOR_DIV_EXPR)
@@ -2736,11 +2861,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
        {
        case TRUNC_MOD_EXPR:
        case TRUNC_DIV_EXPR:
-       if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
+       if (op1_is_constant)
           {
-           if (unsignedp
-               || (INTVAL (op1)
-                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (compute_mode) - 1)))
+           if (unsignedp)
               {
                 unsigned HOST_WIDE_INT mh, ml;
                 int pre_shift, post_shift;
@@ -2752,10 +2875,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                     pre_shift = floor_log2 (d);
                     if (rem_flag)
                       {
-                       remainder = expand_binop (compute_mode, and_optab, op0,
-                                                 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
-                                                 remainder, 1,
-                                                 OPTAB_LIB_WIDEN);
+                       remainder =
+                         expand_binop (compute_mode, and_optab, op0,
+                                       GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
+                                       remainder, 1,
+                                       OPTAB_LIB_WIDEN);
                         if (remainder)
                           return gen_lowpart (mode, remainder);
                       }
@@ -2763,77 +2887,87 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                                              build_int_2 (pre_shift, 0),
                                              tquotient, 1);
                   }
-               else if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
+               else if (size <= HOST_BITS_PER_WIDE_INT)
                   {
-                   /* Most significant bit of divisor is set, emit a scc insn.
-                      emit_store_flag needs to be passed a place for the
-                      result.  */
-                   quotient = emit_store_flag (tquotient, GEU, op0, op1,
-                                               compute_mode, 1, 1);
-                   /* Can emit_store_flag have failed? */
-                   if (quotient == 0)
-                     goto fail1;
-                 }
-               else
-                 {
-                   /* Find a suitable multiplier and right shift count instead
-                      of multiplying with D.  */
-
-                   mh = choose_multiplier (d, size, size,
-                                           &ml, &post_shift, &dummy);
-
-                   /* If the suggested multiplier is more than SIZE bits, we
-                      can do better for even divisors, using an initial right
-                      shift.  */
-                   if (mh != 0 && (d & 1) == 0)
-                     {
-                       pre_shift = floor_log2 (d & -d);
-                       mh = choose_multiplier (d >> pre_shift, size,
-                                               size - pre_shift,
-                                               &ml, &post_shift, &dummy);
-                       if (mh)
-                         abort ();
-                     }
-                   else
-                     pre_shift = 0;
-
-                   if (mh != 0)
+                   if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
                       {
-                       rtx t1, t2, t3, t4;
-
-                       t1 = expand_mult_highpart (compute_mode, op0, ml,
-                                                  NULL_RTX, 1);
-                       if (t1 == 0)
+                       /* Most significant bit of divisor is set; emit an scc
+                          insn.  */
+                       quotient = emit_store_flag (tquotient, GEU, op0, op1,
+                                                   compute_mode, 1, 1);
+                       if (quotient == 0)
                           goto fail1;
-                       t2 = force_operand (gen_rtx (MINUS, compute_mode,
-                                                    op0, t1),
-                                           NULL_RTX);
-                       t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
-                                          build_int_2 (1, 0), NULL_RTX, 1);
-                       t4 = force_operand (gen_rtx (PLUS, compute_mode,
-                                                    t1, t3),
-                                           NULL_RTX);
-                       quotient = expand_shift (RSHIFT_EXPR, compute_mode, t4,
-                                                build_int_2 (post_shift - 1,
-                                                             0),
-                                                tquotient, 1);
                       }
                     else
                       {
-                       rtx t1, t2;
+                       /* Find a suitable multiplier and right shift count
+                          instead of multiplying with D.  */
  
-                       t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
-                                          build_int_2 (pre_shift, 0),
-                                          NULL_RTX, 1);
-                       t2 = expand_mult_highpart (compute_mode, t1, ml,
-                                                  NULL_RTX, 1);
-                       if (t2 == 0)
-                         goto fail1;
-                       quotient = expand_shift (RSHIFT_EXPR, compute_mode, t2,
-                                                build_int_2 (post_shift, 0),
-                                                tquotient, 1);
+                       mh = choose_multiplier (d, size, size,
+                                               &ml, &post_shift, &dummy);
+
+                       /* If the suggested multiplier is more than SIZE bits,
+                          we can do better for even divisors, using an
+                          initial right shift.  */
+                       if (mh != 0 && (d & 1) == 0)
+                         {
+                           pre_shift = floor_log2 (d & -d);
+                           mh = choose_multiplier (d >> pre_shift, size,
+                                                   size - pre_shift,
+                                                   &ml, &post_shift, &dummy);
+                           if (mh)
+                             abort ();
+                         }
+                       else
+                         pre_shift = 0;
+
+                       if (mh != 0)
+                         {
+                           rtx t1, t2, t3, t4;
+
+                           extra_cost = (shift_cost[post_shift - 1]
+                                         + shift_cost[1] + 2 * add_cost);
+                           t1 = expand_mult_highpart (compute_mode, op0, ml,
+                                                      NULL_RTX, 1,
+                                                      max_cost - extra_cost);
+                           if (t1 == 0)
+                             goto fail1;
+                           t2 = force_operand (gen_rtx (MINUS, compute_mode,
+                                                        op0, t1),
+                                               NULL_RTX);
+                           t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
+                                              build_int_2 (1, 0), NULL_RTX,1);
+                           t4 = force_operand (gen_rtx (PLUS, compute_mode,
+                                                        t1, t3),
+                                               NULL_RTX);
+                           quotient =
+                             expand_shift (RSHIFT_EXPR, compute_mode, t4,
+                                           build_int_2 (post_shift - 1, 0),
+                                           tquotient, 1);
+                         }
+                       else
+                         {
+                           rtx t1, t2;
+
+                           t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
+                                              build_int_2 (pre_shift, 0),
+                                              NULL_RTX, 1);
+                           extra_cost = (shift_cost[pre_shift]
+                                         + shift_cost[post_shift]);
+                           t2 = expand_mult_highpart (compute_mode, t1, ml,
+                                                      NULL_RTX, 1,
+                                                      max_cost - extra_cost);
+                           if (t2 == 0)
+                             goto fail1;
+                           quotient =
+                             expand_shift (RSHIFT_EXPR, compute_mode, t2,
+                                           build_int_2 (post_shift, 0),
+                                           tquotient, 1);
+                         }
                       }
                   }
+               else            /* Too wide mode to use tricky code */
+                 break;
  
                 insn = get_last_insn ();
                 if (insn != last
@@ -2863,6 +2997,14 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                 else if (d == -1)
                   quotient = expand_unop (compute_mode, neg_optab, op0,
                                           tquotient, 0);
+               else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
+                 {
+                   /* This case is not handled correctly below.  */
+                   quotient = emit_store_flag (tquotient, EQ, op0, op1,
+                                               compute_mode, 1, 1);
+                   if (quotient == 0)
+                     goto fail1;
+                 }
                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap))
                   ;
@@ -2919,7 +3061,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                                                 quotient, quotient, 0);
                       }
                   }
-               else
+               else if (size <= HOST_BITS_PER_WIDE_INT)
                   {
                     choose_multiplier (abs_d, size, size - 1,
                                        &ml, &post_shift, &lgup);
@@ -2927,8 +3069,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                       {
                         rtx t1, t2, t3;
  
+                       extra_cost = (shift_cost[post_shift]
+                                     + shift_cost[size - 1] + add_cost);
                         t1 = expand_mult_highpart (compute_mode, op0, ml,
-                                                  NULL_RTX, 0);
+                                                  NULL_RTX, 0,
+                                                  max_cost - extra_cost);
                         if (t1 == 0)
                           goto fail1;
                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
@@ -2947,8 +3092,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                         rtx t1, t2, t3, t4;
  
                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
+                       extra_cost = (shift_cost[post_shift]
+                                     + shift_cost[size - 1] + 2 * add_cost);
                         t1 = expand_mult_highpart (compute_mode, op0, ml,
-                                                  NULL_RTX, 0);
+                                                  NULL_RTX, 0,
+                                                  max_cost - extra_cost);
                         if (t1 == 0)
                           goto fail1;
                         t2 = force_operand (gen_rtx (PLUS, compute_mode, t1, op0),
@@ -2965,6 +3113,8 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                                                     tquotient);
                       }
                   }
+               else            /* Too wide mode to use tricky code */
+                 break;
  
                 insn = get_last_insn ();
                 if (insn != last
@@ -3022,8 +3172,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                                        build_int_2 (size - 1, 0), NULL_RTX, 0);
                     t2 = expand_binop (compute_mode, xor_optab, op0, t1,
                                        NULL_RTX, 0, OPTAB_WIDEN);
+                   extra_cost = (shift_cost[post_shift]
+                                 + shift_cost[size - 1] + 2 * add_cost);
                     t3 = expand_mult_highpart (compute_mode, t2, ml,
-                                              NULL_RTX, 1);
+                                              NULL_RTX, 1,
+                                              max_cost - extra_cost);
                     if (t3 != 0)
                       {
                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
@@ -3069,16 +3222,19 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
            or remainder to get floor rounding, once we have the remainder.
            Notice that we compute also the final remainder value here,
            and return the result right away.  */
-       if (target == 0)
+       if (target == 0 || GET_MODE (target) != compute_mode)
           target = gen_reg_rtx (compute_mode);
+
         if (rem_flag)
           {
-           remainder = target;
+           remainder
+             = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
             quotient = gen_reg_rtx (compute_mode);
           }
         else
           {
-           quotient = target;
+           quotient
+             = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
             remainder = gen_reg_rtx (compute_mode);
           }
  
@@ -3193,16 +3349,19 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                quotient or remainder to get ceiling rounding, once we have the
                remainder.  Notice that we compute also the final remainder
                value here, and return the result right away.  */
-           if (target == 0)
+           if (target == 0 || GET_MODE (target) != compute_mode)
               target = gen_reg_rtx (compute_mode);
+
             if (rem_flag)
               {
-               remainder = target;
+               remainder = (GET_CODE (target) == REG
+                            ? target : gen_reg_rtx (compute_mode));
                 quotient = gen_reg_rtx (compute_mode);
               }
             else
               {
-               quotient = target;
+               quotient = (GET_CODE (target) == REG
+                           ? target : gen_reg_rtx (compute_mode));
                 remainder = gen_reg_rtx (compute_mode);
               }
  
@@ -3292,16 +3451,18 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                quotient or remainder to get ceiling rounding, once we have the
                remainder.  Notice that we compute also the final remainder
                value here, and return the result right away.  */
-           if (target == 0)
+           if (target == 0 || GET_MODE (target) != compute_mode)
               target = gen_reg_rtx (compute_mode);
             if (rem_flag)
               {
-               remainder = target;
+               remainder= (GET_CODE (target) == REG
+                           ? target : gen_reg_rtx (compute_mode));
                 quotient = gen_reg_rtx (compute_mode);
               }
             else
               {
-               quotient = target;
+               quotient = (GET_CODE (target) == REG
+                           ? target : gen_reg_rtx (compute_mode));
                 remainder = gen_reg_rtx (compute_mode);
               }
  
@@ -3474,6 +3635,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
  
    if (quotient == 0)
      {
+      if (target && GET_MODE (target) != compute_mode)
+       target = 0;
+
        if (rem_flag)
         {
           /* Try to produce the remainder directly without a library call.  */
@@ -3497,11 +3661,18 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
             return gen_lowpart (mode, remainder);
         }
  
-      /* Produce the quotient.  */
-      /* Try a quotient insn, but not a library call.  */
-      quotient = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
-                                   op0, op1, rem_flag ? NULL_RTX : target,
-                                   unsignedp, OPTAB_WIDEN);
+      /* Produce the quotient.  Try a quotient insn, but not a library call.
+        If we have a divmod in this mode, use it in preference to widening
+        the div (for this test we assume it will not fail). Note that optab2
+        is set to the one of the two optabs that the call below will use.  */
+      quotient
+       = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
+                            op0, op1, rem_flag ? NULL_RTX : target,
+                            unsignedp,
+                            ((optab2->handlers[(int) compute_mode].insn_code
+                              != CODE_FOR_nothing)
+                             ? OPTAB_DIRECT : OPTAB_WIDEN));
+
        if (quotient == 0)
         {
           /* No luck there.  Try a quotient-and-remainder insn,
@@ -3525,6 +3696,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
  
    if (rem_flag)
      {
+      if (target && GET_MODE (target) != compute_mode)
+       target = 0;
+
        if (quotient == 0)
         /* No divide instruction either.  Use library for remainder.  */
         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
@@ -3707,7 +3881,7 @@ expand_and (op0, op1, target)
     to perform the operation.  It says to use zero-extension.
  
     NORMALIZEP is 1 if we should convert the result to be either zero
-   or one one.  Normalize is -1 if we should convert the result to be
+   or one.  Normalize is -1 if we should convert the result to be
     either zero or -1.  If NORMALIZEP is zero, the result will be left
     "raw" out of the scc insn.  */
  
@@ -3725,12 +3899,9 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
    enum machine_mode compare_mode;
    enum machine_mode target_mode = GET_MODE (target);
    rtx tem;
-  rtx last = 0;
+  rtx last = get_last_insn ();
    rtx pattern, comparison;
  
-  if (mode == VOIDmode)
-    mode = GET_MODE (op0);
-
    /* If one operand is constant, make it the second one.  Only do this
       if the other operand is not constant as well.  */
  
@@ -3743,9 +3914,12 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
        code = swap_condition (code);
      }
  
+  if (mode == VOIDmode)
+    mode = GET_MODE (op0);
+
    /* For some comparisons with 1 and -1, we can convert this to 
       comparisons with zero.  This will often produce more opportunities for
-     store-flag insns. */
+     store-flag insns.  */
  
    switch (code)
      {
@@ -3784,7 +3958,7 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
        && GET_MODE_CLASS (mode) == MODE_INT
        && (normalizep || STORE_FLAG_VALUE == 1
           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
-             && (STORE_FLAG_VALUE 
+             && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
      {
        subtarget = target;
@@ -3803,9 +3977,11 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
         subtarget = 0;
  
        if (code == GE)
-       op0 = expand_unop (mode, one_cmpl_optab, op0, subtarget, 0);
+       op0 = expand_unop (mode, one_cmpl_optab, op0,
+                          ((STORE_FLAG_VALUE == 1 || normalizep)
+                           ? 0 : subtarget), 0);
  
-      if (normalizep || STORE_FLAG_VALUE == 1)
+      if (STORE_FLAG_VALUE == 1 || normalizep)
         /* If we are supposed to produce a 0/1 value, we want to do
            a logical shift from the sign bit to the low-order bit; for
            a -1/0 value, we do an arithmetic shift.  */
@@ -3923,10 +4099,13 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
         }
      }
  
-  if (last)
-    delete_insns_since (last);
+  delete_insns_since (last);
  
-  subtarget = target_mode == mode ? target : 0;
+  /* If expensive optimizations, use different pseudo registers for each
+     insn, instead of reusing the same pseudo.  This leads to better CSE,
+     but slows down the compiler, since there are more pseudos */
+  subtarget = (!flag_expensive_optimizations
+              && (target_mode == mode)) ? target : NULL_RTX;
  
    /* If we reached here, we can't do this with a scc insn.  However, there
       are some comparisons that can be done directly.  For example, if
@@ -3973,7 +4152,7 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
         normalizep = STORE_FLAG_VALUE;
  
        else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
-              && (STORE_FLAG_VALUE
+              && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
                    == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
         ;
        else
@@ -4071,21 +4250,65 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
    if (tem && normalizep)
      tem = expand_shift (RSHIFT_EXPR, mode, tem,
                         size_int (GET_MODE_BITSIZE (mode) - 1),
-                       tem, normalizep == 1);
+                       subtarget, normalizep == 1);
  
-  if (tem && GET_MODE (tem) != target_mode)
+  if (tem)
      {
-      convert_move (target, tem, 0);
-      tem = target;
+      if (GET_MODE (tem) != target_mode)
+       {
+         convert_move (target, tem, 0);
+         tem = target;
+       }
+      else if (!subtarget)
+       {
+         emit_move_insn (target, tem);
+         tem = target;
+       }
      }
-
-  if (tem == 0)
+  else
      delete_insns_since (last);
  
    return tem;
  }
-  emit_jump_insn ((*bcc_gen_fctn[(int) code]) (label));
+
+/* Like emit_store_flag, but always succeeds.  */
+
+rtx
+emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
+     rtx target;
+     enum rtx_code code;
+     rtx op0, op1;
+     enum machine_mode mode;
+     int unsignedp;
+     int normalizep;
+{
+  rtx tem, label;
+
+  /* First see if emit_store_flag can do the job.  */
+  tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
+  if (tem != 0)
+    return tem;
+
+  if (normalizep == 0)
+    normalizep = 1;
+
+  /* If this failed, we have to do this with set/compare/jump/set code.  */
+
+  if (GET_CODE (target) != REG
+      || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
+    target = gen_reg_rtx (GET_MODE (target));
+
    emit_move_insn (target, const1_rtx);
+  tem = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
+  if (GET_CODE (tem) == CONST_INT)
+    return tem;
+
+  label = gen_label_rtx ();
+  if (bcc_gen_fctn[(int) code] == 0)
+    abort ();
+
+  emit_jump_insn ((*bcc_gen_fctn[(int) code]) (label));
+  emit_move_insn (target, const0_rtx);
    emit_label (label);
  
    return target;