X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fexpmed.c;h=65fb007da57ea969e872d1566f74535e483f15a4;hb=188c68d9c1d94d29fc5d73921b70d64a5163e247;hp=3802eec1391eee104d70b449f80b9823656f916c;hpb=0861b09e4706ebfe6497fe18944081e76a7a70ec;p=pf3gnuchains%2Fgcc-fork.git

diff --git a/gcc/expmed.c b/gcc/expmed.c
index 3802eec1391..65fb007da57 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -1,6 +1,6 @@
 /* Medium-level subroutines: convert bit-field store and extract
    and shifts, multiplies and divides to rtl instructions.
-   Copyright (C) 1987, 88, 89, 92, 93, 1994 Free Software Foundation, Inc.
+   Copyright (C) 1987, 88, 89, 92-6, 1997 Free Software Foundation, Inc.
 
 This file is part of GNU CC.
 
@@ -16,7 +16,8 @@ GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
 along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
 
 
 #include "config.h"
@@ -61,11 +62,16 @@ static int sdiv_pow2_cheap, smod_pow2_cheap;
 #define MAX_BITS_PER_WORD BITS_PER_WORD
 #endif
 
-/* Cost of various pieces of RTL.  */
+/* Cost of various pieces of RTL.  Note that some of these are indexed by shift count,
+   and some by mode.  */
 static int add_cost, negate_cost, zero_cost;
 static int shift_cost[MAX_BITS_PER_WORD];
 static int shiftadd_cost[MAX_BITS_PER_WORD];
 static int shiftsub_cost[MAX_BITS_PER_WORD];
+static int mul_cost[NUM_MACHINE_MODES];
+static int div_cost[NUM_MACHINE_MODES];
+static int mul_widen_cost[NUM_MACHINE_MODES];
+static int mul_highpart_cost[NUM_MACHINE_MODES];
 
 void
 init_expmed ()
@@ -77,6 +83,7 @@ init_expmed ()
   rtx shift_insn, shiftadd_insn, shiftsub_insn;
   int dummy;
   int m;
+  enum machine_mode mode, wider_mode;
 
   start_sequence ();
 
@@ -138,6 +145,32 @@ init_expmed ()
     = (rtx_cost (gen_rtx (MOD, word_mode, reg, GEN_INT (32)), SET)
        <= 2 * add_cost);
 
+  for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
+       mode != VOIDmode;
+       mode = GET_MODE_WIDER_MODE (mode))
+    {
+      reg = gen_rtx (REG, mode, 10000);
+      div_cost[(int) mode] = rtx_cost (gen_rtx (UDIV, mode, reg, reg), SET);
+      mul_cost[(int) mode] = rtx_cost (gen_rtx (MULT, mode, reg, reg), SET);
+      wider_mode = GET_MODE_WIDER_MODE (mode);
+      if (wider_mode != VOIDmode)
+	{
+	  mul_widen_cost[(int) wider_mode]
+	    = rtx_cost (gen_rtx (MULT, wider_mode,
+				 gen_rtx (ZERO_EXTEND, wider_mode, reg),
+				 gen_rtx (ZERO_EXTEND, wider_mode, reg)),
+			SET);
+	  mul_highpart_cost[(int) mode]
+	    = rtx_cost (gen_rtx (TRUNCATE, mode,
+				 gen_rtx (LSHIFTRT, wider_mode,
+					  gen_rtx (MULT, wider_mode,
+						   gen_rtx (ZERO_EXTEND, wider_mode, reg),
+						   gen_rtx (ZERO_EXTEND, wider_mode, reg)),
+					  GEN_INT (GET_MODE_BITSIZE (mode)))),
+			SET);
+	}
+    }
+
   /* Free the objects we just allocated.  */
   end_sequence ();
   obfree (free_point);
@@ -152,21 +185,12 @@ negate_rtx (mode, x)
      enum machine_mode mode;
      rtx x;
 {
-  if (GET_CODE (x) == CONST_INT)
-    {
-      HOST_WIDE_INT val = - INTVAL (x);
-      if (GET_MODE_BITSIZE (mode) < HOST_BITS_PER_WIDE_INT)
-	{
-	  /* Sign extend the value from the bits that are significant.  */
-	  if (val & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
-	    val |= (HOST_WIDE_INT) (-1) << GET_MODE_BITSIZE (mode);
-	  else
-	    val &= ((HOST_WIDE_INT) 1 << GET_MODE_BITSIZE (mode)) - 1;
-	}
-      return GEN_INT (val);
-    }
-  else
-    return expand_unop (GET_MODE (x), neg_optab, x, NULL_RTX, 0);
+  rtx result = simplify_unary_operation (NEG, mode, x, mode);
+
+  if (result == 0)
+    result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
+
+  return result;
 }
 
 /* Generate code to store value from rtx VALUE
@@ -365,6 +389,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
 
 #ifdef HAVE_insv
   if (HAVE_insv
+      && GET_MODE (value) != BLKmode
       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
       /* Ensure insv's size is wide enough for this field.  */
       && (GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_insv][3])
@@ -384,13 +409,13 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
       int save_volatile_ok = volatile_ok;
       volatile_ok = 1;
 
-      /* If this machine's insv can only insert into a register, or if we
-	 are to force MEMs into a register, copy OP0 into a register and
-	 save it back later.  */
+      /* If this machine's insv can only insert into a register, copy OP0
+	 into a register and save it back later.  */
+      /* This used to check flag_force_mem, but that was a serious
+	 de-optimization now that flag_force_mem is enabled by -O2.  */
       if (GET_CODE (op0) == MEM
-	  && (flag_force_mem
-	      || ! ((*insn_operand_predicate[(int) CODE_FOR_insv][0])
-		    (op0, VOIDmode))))
+	  && ! ((*insn_operand_predicate[(int) CODE_FOR_insv][0])
+		(op0, VOIDmode)))
 	{
 	  rtx tempreg;
 	  enum machine_mode bestmode;
@@ -409,7 +434,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
 	    bestmode = GET_MODE (op0);
 
 	  if (bestmode == VOIDmode
-	      || (STRICT_ALIGNMENT && GET_MODE_SIZE (bestmode) > align))
+	      || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
 	    goto insv_loses;
 
 	  /* Adjust address to point to the containing unit of that mode.  */
@@ -532,6 +557,9 @@ store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
   int all_zero = 0;
   int all_one = 0;
 
+  if (! SLOW_UNALIGNED_ACCESS)
+    struct_align = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
+    
   /* There is a case not handled here:
      a structure with a known alignment of just a halfword
      and a field split across two aligned halfwords within the structure.
@@ -715,7 +743,9 @@ store_split_bit_field (op0, bitsize, bitpos, value, align)
 	value = word;
       else
 	value = gen_lowpart_common (word_mode,
-				    force_reg (GET_MODE (value), value));
+				    force_reg (GET_MODE (value) != VOIDmode
+					       ? GET_MODE (value)
+					       : word_mode, value));
     }
 
   while (bitsdone < bitsize)
@@ -736,6 +766,16 @@ store_split_bit_field (op0, bitsize, bitpos, value, align)
 
       if (BYTES_BIG_ENDIAN)
 	{
+	  int total_bits;
+
+	  /* We must do an endian conversion exactly the same way as it is
+	     done in extract_bit_field, so that the two calls to
+	     extract_fixed_bit_field will have comparable arguments.  */
+	  if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
+	    total_bits = BITS_PER_WORD;
+	  else
+	    total_bits = GET_MODE_BITSIZE (GET_MODE (value));
+
 	  /* Fetch successively less significant portions.  */
 	  if (GET_CODE (value) == CONST_INT)
 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
@@ -744,11 +784,19 @@ store_split_bit_field (op0, bitsize, bitpos, value, align)
 	  else
 	    /* The args are chosen so that the last part includes the
 	       lsb.  Give extract_bit_field the value it needs (with
-	       endianness compensation) to fetch the piece we want.  */
-	    part = extract_fixed_bit_field (word_mode, value, 0, thissize,
-					    GET_MODE_BITSIZE (GET_MODE (value))
-					    - bitsize + bitsdone,
-					    NULL_RTX, 1, align);
+	       endianness compensation) to fetch the piece we want.
+
+	       ??? We have no idea what the alignment of VALUE is, so
+	       we have to use a guess.  */
+	    part
+	      = extract_fixed_bit_field
+		(word_mode, value, 0, thissize,
+		 total_bits - bitsize + bitsdone, NULL_RTX, 1,
+		 GET_MODE (value) == VOIDmode
+		 ? UNITS_PER_WORD
+		 : (GET_MODE (value) == BLKmode
+		    ? 1
+		    : GET_MODE_ALIGNMENT (GET_MODE (value)) / BITS_PER_UNIT));
 	}
       else
 	{
@@ -758,8 +806,14 @@ store_split_bit_field (op0, bitsize, bitpos, value, align)
 			     >> bitsdone)
 			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
 	  else
-	    part = extract_fixed_bit_field (word_mode, value, 0, thissize,
-					    bitsdone, NULL_RTX, 1, align);
+	    part
+	      = extract_fixed_bit_field
+		(word_mode, value, 0, thissize, bitsdone, NULL_RTX, 1,
+		 GET_MODE (value) == VOIDmode
+		 ? UNITS_PER_WORD
+		 : (GET_MODE (value) == BLKmode
+		    ? 1
+		    : GET_MODE_ALIGNMENT (GET_MODE (value)) / BITS_PER_UNIT));
 	}
 
       /* If OP0 is a register, then handle OFFSET here.
@@ -831,9 +885,6 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
   rtx spec_target = target;
   rtx spec_target_subreg = 0;
 
-  if (GET_CODE (str_rtx) == MEM && ! MEM_IN_STRUCT_P (str_rtx))
-    abort ();
-
   /* Discount the part of the structure before the desired byte.
      We need to know how many bytes are safe to reference after it.  */
   if (total_size >= 0)
@@ -844,9 +895,27 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
     tmode = mode;
   while (GET_CODE (op0) == SUBREG)
     {
+      int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
+      int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
+
       offset += SUBREG_WORD (op0);
+
+      if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
+	{
+	  bitpos += inner_size - outer_size;
+	  if (bitpos > unit)
+	    {
+	      offset += (bitpos / unit);
+	      bitpos %= unit;
+	    }
+	}
+
       op0 = SUBREG_REG (op0);
     }
+
+  /* ??? We currently assume TARGET is at least as big as BITSIZE.
+     If that's wrong, the solution is to test for it and set TARGET to 0
+     if needed.  */
   
   /* If OP0 is a register, BITPOS must count within a word.
      But as we have it, it counts within whatever size OP0 now has.
@@ -862,7 +931,9 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
      So too extracting a subword value in
      the least significant part of the register.  */
 
-  if ((GET_CODE (op0) == REG
+  if (((GET_CODE (op0) == REG
+	&& TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
+				  GET_MODE_BITSIZE (GET_MODE (op0))))
        || (GET_CODE (op0) == MEM
 	   && (! SLOW_UNALIGNED_ACCESS
 	       || (offset * BITS_PER_UNIT % bitsize == 0
@@ -905,11 +976,18 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
       if (target == 0 || GET_CODE (target) != REG)
 	target = gen_reg_rtx (mode);
 
+      /* Indicate for flow that the entire target reg is being set.  */
+      emit_insn (gen_rtx (CLOBBER, VOIDmode, target));
+
       for (i = 0; i < nwords; i++)
 	{
 	  /* If I is 0, use the low-order word in both field and target;
 	     if I is 1, use the next to lowest word; and so on.  */
-	  int wordnum = (WORDS_BIG_ENDIAN ? nwords - i - 1 : i);
+	  /* Word number in TARGET to use.  */
+	  int wordnum = (WORDS_BIG_ENDIAN
+			 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
+			 : i);
+	  /* Offset from start of field in OP0.  */
 	  int bit_offset = (WORDS_BIG_ENDIAN
 			    ? MAX (0, bitsize - (i + 1) * BITS_PER_WORD)
 			    : i * BITS_PER_WORD);
@@ -929,7 +1007,24 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 	}
 
       if (unsignedp)
-	return target;
+	{
+	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
+	     need to be zero'd out.  */
+	  if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
+	    {
+	      int i,total_words;
+
+	      total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
+	      for (i = nwords; i < total_words; i++)
+		{
+		  int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
+		  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
+		  emit_move_insn (target_part, const0_rtx);
+		}
+	    }
+	  return target;
+	}
+
       /* Signed bit field: sign-extend with two arithmetic shifts.  */
       target = expand_shift (LSHIFT_EXPR, mode, target,
 			     build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
@@ -1012,7 +1107,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 		    bestmode = GET_MODE (xop0);
 
 		  if (bestmode == VOIDmode
-		      || (STRICT_ALIGNMENT && GET_MODE_SIZE (bestmode) > align))
+		      || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
 		    goto extzv_loses;
 
 		  /* Compute offset as multiple of this unit,
@@ -1148,7 +1243,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 		    bestmode = GET_MODE (xop0);
 
 		  if (bestmode == VOIDmode
-		      || (STRICT_ALIGNMENT && GET_MODE_SIZE (bestmode) > align))
+		      || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
 		    goto extv_loses;
 
 		  /* Compute offset as multiple of this unit,
@@ -1378,7 +1473,7 @@ extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
 #ifdef SLOW_ZERO_EXTEND
 	  /* Always generate an `and' if
 	     we just zero-extended op0 and SLOW_ZERO_EXTEND, since it
-	     will combine fruitfully with the zero-extend. */
+	     will combine fruitfully with the zero-extend.  */
 	  || tmode != mode
 #endif
 #endif
@@ -1424,7 +1519,8 @@ extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
    complement of that if COMPLEMENT.  The mask is truncated if
-   necessary to the width of mode MODE.  */
+   necessary to the width of mode MODE.  The mask is zero-extended if
+   BITSIZE+BITPOS is too small for MODE.  */
 
 static rtx
 mask_rtx (mode, bitpos, bitsize, complement)
@@ -1653,7 +1749,7 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
 
   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
 
-#if SHIFT_COUNT_TRUNCATED
+#ifdef SHIFT_COUNT_TRUNCATED
   if (SHIFT_COUNT_TRUNCATED
       && GET_CODE (op1) == CONST_INT
       && (unsigned HOST_WIDE_INT) INTVAL (op1) >= GET_MODE_BITSIZE (mode))
@@ -1682,8 +1778,7 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
 	    continue;
 	  else if (methods == OPTAB_LIB_WIDEN)
 	    {
-	      /* If we are rotating by a constant that is valid and
-		 we have been unable to open-code this by a rotation,
+	      /* If we have been unable to open-code this by a rotation,
 		 do it as the IOR of two shifts.  I.e., to rotate A
 		 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
 		 where C is the bitsize of A.
@@ -1695,25 +1790,25 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
 		 this extremely unlikely lossage to avoid complicating the
 		 code below.  */
 
-	      if (GET_CODE (op1) == CONST_INT && INTVAL (op1) > 0
-		  && INTVAL (op1) < GET_MODE_BITSIZE (mode))
-		{
-		  rtx subtarget = target == shifted ? 0 : target;
-		  rtx temp1;
-		  tree other_amount
-		    = build_int_2 (GET_MODE_BITSIZE (mode) - INTVAL (op1), 0);
-
-		  shifted = force_reg (mode, shifted);
-
-		  temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
-				       mode, shifted, amount, subtarget, 1);
-		  temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
-					mode, shifted, other_amount, 0, 1);
-		  return expand_binop (mode, ior_optab, temp, temp1, target,
-				       unsignedp, methods);
-		}
-	      else
-		methods = OPTAB_LIB;
+	      rtx subtarget = target == shifted ? 0 : target;
+	      rtx temp1;
+	      tree type = TREE_TYPE (amount);
+	      tree new_amount = make_tree (type, op1);
+	      tree other_amount
+		= fold (build (MINUS_EXPR, type,
+			       convert (type,
+					build_int_2 (GET_MODE_BITSIZE (mode),
+						     0)),
+			       amount));
+
+	      shifted = force_reg (mode, shifted);
+
+	      temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
+				   mode, shifted, new_amount, subtarget, 1);
+	      temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
+				    mode, shifted, other_amount, 0, 1);
+	      return expand_binop (mode, ior_optab, temp, temp1, target,
+				   unsignedp, methods);
 	    }
 
 	  temp = expand_binop (mode,
@@ -2382,7 +2477,7 @@ invert_mod2n (x, n)
      unsigned HOST_WIDE_INT x;
      int n;
 {
-  /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y. */
+  /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
 
   /* The algorithm notes that the choice y = x satisfies
      x*y == 1 mod 2^3, since x is assumed odd.
@@ -2445,14 +2540,17 @@ expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
 
    MODE is the mode of operation and result.
 
-   UNSIGNEDP nonzero means unsigned multiply.  */
+   UNSIGNEDP nonzero means unsigned multiply.
+
+   MAX_COST is the total allowed cost for the expanded RTL.  */
 
 rtx
-expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
+expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
      enum machine_mode mode;
      register rtx op0, target;
      unsigned HOST_WIDE_INT cnst1;
      int unsignedp;
+     int max_cost;
 {
   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
   optab mul_highpart_optab;
@@ -2479,7 +2577,8 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
 
   /* expand_mult handles constant multiplication of word_mode
      or narrower.  It does a poor job for large modes.  */
-  if (size < BITS_PER_WORD)
+  if (size < BITS_PER_WORD
+      && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
     {
       /* We have to do this, since expand_binop doesn't do conversion for
 	 multiply.  Maybe change expand_binop to handle widening multiply?  */
@@ -2488,7 +2587,7 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, unsignedp);
       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
 			  build_int_2 (size, 0), NULL_RTX, 1);
-      return gen_lowpart (mode, tem);
+      return convert_modes (mode, wider_mode, tem, unsignedp);
     }
 
   if (target == 0)
@@ -2496,65 +2595,87 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
 
   /* Firstly, try using a multiplication insn that only generates the needed
      high part of the product, and in the sign flavor of unsignedp.  */
-  mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
-  target = expand_binop (mode, mul_highpart_optab,
-			 op0, op1, target, unsignedp, OPTAB_DIRECT);
-  if (target)
-    return target;
+  if (mul_highpart_cost[(int) mode] < max_cost)
+    {
+      mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
+      target = expand_binop (mode, mul_highpart_optab,
+			     op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
+      if (target)
+	return target;
+    }
 
   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
      Need to adjust the result after the multiplication.  */
-  mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
-  target = expand_binop (mode, mul_highpart_optab,
-			 op0, op1, target, unsignedp, OPTAB_DIRECT);
-  if (target)
-    /* We used the wrong signedness.  Adjust the result.  */
-    return expand_mult_highpart_adjust (mode, target, op0,
-					op1, target, unsignedp);
-
-  /* Thirdly, we try to use a widening multiplication, or a wider mode
-     multiplication.  */
+  if (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)
+    {
+      mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
+      target = expand_binop (mode, mul_highpart_optab,
+			     op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
+      if (target)
+	/* We used the wrong signedness.  Adjust the result.  */
+	return expand_mult_highpart_adjust (mode, target, op0,
+					    op1, target, unsignedp);
+    }
 
+  /* Try widening multiplication.  */
   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
-  if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing)
-    ;
-  else if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing)
-    moptab = smul_optab;
-  else
+  if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
+      && mul_widen_cost[(int) wider_mode] < max_cost)
+    {
+      op1 = force_reg (mode, op1);
+      goto try;
+    } 
+
+  /* Try widening the mode and perform a non-widening multiplication.  */
+  moptab = smul_optab;
+  if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
+      && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
     {
-      /* Try widening multiplication of opposite signedness, and adjust.  */
-      moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
-      if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing)
+      op1 = wide_op1;
+      goto try;
+    }
+
+  /* Try widening multiplication of opposite signedness, and adjust.  */
+  moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
+  if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
+      && (mul_widen_cost[(int) wider_mode]
+	  + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
+    {
+      rtx regop1 = force_reg (mode, op1);
+      tem = expand_binop (wider_mode, moptab, op0, regop1,
+			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
+      if (tem != 0)
 	{
-	  tem = expand_binop (wider_mode, moptab, op0, wide_op1,
-			      NULL_RTX, ! unsignedp, OPTAB_WIDEN);
-	  if (tem != 0)
-	    {
-	      /* Extract the high half of the just generated product.  */
-	      tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-				  build_int_2 (size, 0), NULL_RTX, 1);
-	      tem = gen_lowpart (mode, tem);
-	      /* We used the wrong signedness.  Adjust the result.  */
-	      return expand_mult_highpart_adjust (mode, tem, op0, op1,
-						  target, unsignedp);
-	    }
+	  /* Extract the high half of the just generated product.  */
+	  tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
+			      build_int_2 (size, 0), NULL_RTX, 1);
+	  tem = convert_modes (mode, wider_mode, tem, unsignedp);
+	  /* We used the wrong signedness.  Adjust the result.  */
+	  return expand_mult_highpart_adjust (mode, tem, op0, op1,
+					      target, unsignedp);
 	}
-
-      /* As a last resort, try widening the mode and perform a
-	 non-widening multiplication.  */
-      moptab = smul_optab;
     }
 
+  return 0;
+
+ try:
   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
-  tem = expand_binop (wider_mode, moptab, op0, wide_op1,
+  tem = expand_binop (wider_mode, moptab, op0, op1,
 		      NULL_RTX, unsignedp, OPTAB_WIDEN);
   if (tem == 0)
     return 0;
 
   /* Extract the high half of the just generated product.  */
-  tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-		      build_int_2 (size, 0), NULL_RTX, 1);
-  return gen_lowpart (mode, tem);
+  if (mode == word_mode)
+    {
+      return gen_highpart (mode, tem);
+    }
+  else
+    {
+      tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
+			  build_int_2 (size, 0), NULL_RTX, 1);
+      return convert_modes (mode, wider_mode, tem, unsignedp);
+    }
 }
 
 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
@@ -2592,6 +2713,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
   rtx insn, set;
   optab optab1, optab2;
   int op1_is_constant, op1_is_pow2;
+  int max_cost, extra_cost;
 
   op1_is_constant = GET_CODE (op1) == CONST_INT;
   op1_is_pow2 = (op1_is_constant
@@ -2696,11 +2818,14 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
   size = GET_MODE_BITSIZE (compute_mode);
 #if 0
   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
-     (mode), and thereby get better code when OP1 is a constant.  Do that for
-     GCC 2.7.  It will require going over all usages of SIZE below.  */
+     (mode), and thereby get better code when OP1 is a constant.  Do that
+     later.  It will require going over all usages of SIZE below.  */
   size = GET_MODE_BITSIZE (mode);
 #endif
 
+  max_cost = div_cost[(int) compute_mode]
+    - (rem_flag ? mul_cost[(int) compute_mode] + add_cost : 0);
+
   /* Now convert to the best mode to use.  */
   if (compute_mode != mode)
     {
@@ -2722,7 +2847,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 
   last = get_last_insn ();
 
-  /* Promote floor rouding to trunc rounding for unsigned operations.  */
+  /* Promote floor rounding to trunc rounding for unsigned operations.  */
   if (unsignedp)
     {
       if (code == FLOOR_DIV_EXPR)
@@ -2736,11 +2861,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
       {
       case TRUNC_MOD_EXPR:
       case TRUNC_DIV_EXPR:
-	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
+	if (op1_is_constant)
 	  {
-	    if (unsignedp
-		|| (INTVAL (op1)
-		    == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (compute_mode) - 1)))
+	    if (unsignedp)
 	      {
 		unsigned HOST_WIDE_INT mh, ml;
 		int pre_shift, post_shift;
@@ -2752,10 +2875,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 		    pre_shift = floor_log2 (d);
 		    if (rem_flag)
 		      {
-			remainder = expand_binop (compute_mode, and_optab, op0,
-						  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
-						  remainder, 1,
-						  OPTAB_LIB_WIDEN);
+			remainder =
+			  expand_binop (compute_mode, and_optab, op0,
+					GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
+					remainder, 1,
+					OPTAB_LIB_WIDEN);
 			if (remainder)
 			  return gen_lowpart (mode, remainder);
 		      }
@@ -2763,77 +2887,87 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 					     build_int_2 (pre_shift, 0),
 					     tquotient, 1);
 		  }
-		else if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
+		else if (size <= HOST_BITS_PER_WIDE_INT)
 		  {
-		    /* Most significant bit of divisor is set, emit a scc insn.
-		       emit_store_flag needs to be passed a place for the
-		       result.  */
-		    quotient = emit_store_flag (tquotient, GEU, op0, op1,
-						compute_mode, 1, 1);
-		    /* Can emit_store_flag have failed? */
-		    if (quotient == 0)
-		      goto fail1;
-		  }
-		else
-		  {
-		    /* Find a suitable multiplier and right shift count instead
-		       of multiplying with D.  */
-
-		    mh = choose_multiplier (d, size, size,
-					    &ml, &post_shift, &dummy);
-
-		    /* If the suggested multiplier is more than SIZE bits, we
-		       can do better for even divisors, using an initial right
-		       shift.  */
-		    if (mh != 0 && (d & 1) == 0)
-		      {
-			pre_shift = floor_log2 (d & -d);
-			mh = choose_multiplier (d >> pre_shift, size,
-						size - pre_shift,
-						&ml, &post_shift, &dummy);
-			if (mh)
-			  abort ();
-		      }
-		    else
-		      pre_shift = 0;
-
-		    if (mh != 0)
+		    if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
 		      {
-			rtx t1, t2, t3, t4;
-
-			t1 = expand_mult_highpart (compute_mode, op0, ml,
-						   NULL_RTX, 1);
-			if (t1 == 0)
+			/* Most significant bit of divisor is set; emit an scc
+			   insn.  */
+			quotient = emit_store_flag (tquotient, GEU, op0, op1,
+						    compute_mode, 1, 1);
+			if (quotient == 0)
 			  goto fail1;
-			t2 = force_operand (gen_rtx (MINUS, compute_mode,
-						     op0, t1),
-					    NULL_RTX);
-			t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
-					   build_int_2 (1, 0), NULL_RTX, 1);
-			t4 = force_operand (gen_rtx (PLUS, compute_mode,
-						     t1, t3),
-					    NULL_RTX);
-			quotient = expand_shift (RSHIFT_EXPR, compute_mode, t4,
-						 build_int_2 (post_shift - 1,
-							      0),
-						 tquotient, 1);
 		      }
 		    else
 		      {
-			rtx t1, t2;
+			/* Find a suitable multiplier and right shift count
+			   instead of multiplying with D.  */
 
-			t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
-					   build_int_2 (pre_shift, 0),
-					   NULL_RTX, 1);
-			t2 = expand_mult_highpart (compute_mode, t1, ml,
-						   NULL_RTX, 1);
-			if (t2 == 0)
-			  goto fail1;
-			quotient = expand_shift (RSHIFT_EXPR, compute_mode, t2,
-						 build_int_2 (post_shift, 0),
-						 tquotient, 1);
+			mh = choose_multiplier (d, size, size,
+						&ml, &post_shift, &dummy);
+
+			/* If the suggested multiplier is more than SIZE bits,
+			   we can do better for even divisors, using an
+			   initial right shift.  */
+			if (mh != 0 && (d & 1) == 0)
+			  {
+			    pre_shift = floor_log2 (d & -d);
+			    mh = choose_multiplier (d >> pre_shift, size,
+						    size - pre_shift,
+						    &ml, &post_shift, &dummy);
+			    if (mh)
+			      abort ();
+			  }
+			else
+			  pre_shift = 0;
+
+			if (mh != 0)
+			  {
+			    rtx t1, t2, t3, t4;
+
+			    extra_cost = (shift_cost[post_shift - 1]
+					  + shift_cost[1] + 2 * add_cost);
+			    t1 = expand_mult_highpart (compute_mode, op0, ml,
+						       NULL_RTX, 1,
+						       max_cost - extra_cost);
+			    if (t1 == 0)
+			      goto fail1;
+			    t2 = force_operand (gen_rtx (MINUS, compute_mode,
+							 op0, t1),
+						NULL_RTX);
+			    t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
+					       build_int_2 (1, 0), NULL_RTX,1);
+			    t4 = force_operand (gen_rtx (PLUS, compute_mode,
+							 t1, t3),
+						NULL_RTX);
+			    quotient =
+			      expand_shift (RSHIFT_EXPR, compute_mode, t4,
+					    build_int_2 (post_shift - 1, 0),
+					    tquotient, 1);
+			  }
+			else
+			  {
+			    rtx t1, t2;
+
+			    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
+					       build_int_2 (pre_shift, 0),
+					       NULL_RTX, 1);
+			    extra_cost = (shift_cost[pre_shift]
+					  + shift_cost[post_shift]);
+			    t2 = expand_mult_highpart (compute_mode, t1, ml,
+						       NULL_RTX, 1,
+						       max_cost - extra_cost);
+			    if (t2 == 0)
+			      goto fail1;
+			    quotient =
+			      expand_shift (RSHIFT_EXPR, compute_mode, t2,
+					    build_int_2 (post_shift, 0),
+					    tquotient, 1);
+			  }
 		      }
 		  }
+		else		/* Too wide mode to use tricky code */
+		  break;
 
 		insn = get_last_insn ();
 		if (insn != last
@@ -2863,6 +2997,14 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 		else if (d == -1)
 		  quotient = expand_unop (compute_mode, neg_optab, op0,
 					  tquotient, 0);
+		else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
+		  {
+		    /* This case is not handled correctly below.  */
+		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
+						compute_mode, 1, 1);
+		    if (quotient == 0)
+		      goto fail1;
+		  }
 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
 			 && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap))
 		  ;
@@ -2919,7 +3061,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 						quotient, quotient, 0);
 		      }
 		  }
-		else
+		else if (size <= HOST_BITS_PER_WIDE_INT)
 		  {
 		    choose_multiplier (abs_d, size, size - 1,
 				       &ml, &post_shift, &lgup);
@@ -2927,8 +3069,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 		      {
 			rtx t1, t2, t3;
 
+			extra_cost = (shift_cost[post_shift]
+				      + shift_cost[size - 1] + add_cost);
 			t1 = expand_mult_highpart (compute_mode, op0, ml,
-						   NULL_RTX, 0);
+						   NULL_RTX, 0,
+						   max_cost - extra_cost);
 			if (t1 == 0)
 			  goto fail1;
 			t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
@@ -2947,8 +3092,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 			rtx t1, t2, t3, t4;
 
 			ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
+			extra_cost = (shift_cost[post_shift]
+				      + shift_cost[size - 1] + 2 * add_cost);
 			t1 = expand_mult_highpart (compute_mode, op0, ml,
-						   NULL_RTX, 0);
+						   NULL_RTX, 0,
+						   max_cost - extra_cost);
 			if (t1 == 0)
 			  goto fail1;
 			t2 = force_operand (gen_rtx (PLUS, compute_mode, t1, op0),
@@ -2965,6 +3113,8 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 						    tquotient);
 		      }
 		  }
+		else		/* Too wide mode to use tricky code */
+		  break;
 
 		insn = get_last_insn ();
 		if (insn != last
@@ -3022,8 +3172,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 				       build_int_2 (size - 1, 0), NULL_RTX, 0);
 		    t2 = expand_binop (compute_mode, xor_optab, op0, t1,
 				       NULL_RTX, 0, OPTAB_WIDEN);
+		    extra_cost = (shift_cost[post_shift]
+				  + shift_cost[size - 1] + 2 * add_cost);
 		    t3 = expand_mult_highpart (compute_mode, t2, ml,
-					       NULL_RTX, 1);
+					       NULL_RTX, 1,
+					       max_cost - extra_cost);
 		    if (t3 != 0)
 		      {
 			t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
@@ -3069,16 +3222,19 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 	   or remainder to get floor rounding, once we have the remainder.
 	   Notice that we compute also the final remainder value here,
 	   and return the result right away.  */
-	if (target == 0)
+	if (target == 0 || GET_MODE (target) != compute_mode)
 	  target = gen_reg_rtx (compute_mode);
+
 	if (rem_flag)
 	  {
-	    remainder = target;
+	    remainder
+	      = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
 	    quotient = gen_reg_rtx (compute_mode);
 	  }
 	else
 	  {
-	    quotient = target;
+	    quotient
+	      = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
 	    remainder = gen_reg_rtx (compute_mode);
 	  }
 
@@ -3193,16 +3349,19 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 	       quotient or remainder to get ceiling rounding, once we have the
 	       remainder.  Notice that we compute also the final remainder
 	       value here, and return the result right away.  */
-	    if (target == 0)
+	    if (target == 0 || GET_MODE (target) != compute_mode)
 	      target = gen_reg_rtx (compute_mode);
+
 	    if (rem_flag)
 	      {
-		remainder = target;
+		remainder = (GET_CODE (target) == REG
+			     ? target : gen_reg_rtx (compute_mode));
 		quotient = gen_reg_rtx (compute_mode);
 	      }
 	    else
 	      {
-		quotient = target;
+		quotient = (GET_CODE (target) == REG
+			    ? target : gen_reg_rtx (compute_mode));
 		remainder = gen_reg_rtx (compute_mode);
 	      }
 
@@ -3292,16 +3451,18 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 	       quotient or remainder to get ceiling rounding, once we have the
 	       remainder.  Notice that we compute also the final remainder
 	       value here, and return the result right away.  */
-	    if (target == 0)
+	    if (target == 0 || GET_MODE (target) != compute_mode)
 	      target = gen_reg_rtx (compute_mode);
 	    if (rem_flag)
 	      {
-		remainder = target;
+		remainder= (GET_CODE (target) == REG
+			    ? target : gen_reg_rtx (compute_mode));
 		quotient = gen_reg_rtx (compute_mode);
 	      }
 	    else
 	      {
-		quotient = target;
+		quotient = (GET_CODE (target) == REG
+			    ? target : gen_reg_rtx (compute_mode));
 		remainder = gen_reg_rtx (compute_mode);
 	      }
 
@@ -3474,6 +3635,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 
   if (quotient == 0)
     {
+      if (target && GET_MODE (target) != compute_mode)
+	target = 0;
+
       if (rem_flag)
 	{
 	  /* Try to produce the remainder directly without a library call.  */
@@ -3497,11 +3661,18 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 	    return gen_lowpart (mode, remainder);
 	}
 
-      /* Produce the quotient.  */
-      /* Try a quotient insn, but not a library call.  */
-      quotient = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
-				    op0, op1, rem_flag ? NULL_RTX : target,
-				    unsignedp, OPTAB_WIDEN);
+      /* Produce the quotient.  Try a quotient insn, but not a library call.
+	 If we have a divmod in this mode, use it in preference to widening
+	 the div (for this test we assume it will not fail). Note that optab2
+	 is set to the one of the two optabs that the call below will use.  */
+      quotient
+	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
+			     op0, op1, rem_flag ? NULL_RTX : target,
+			     unsignedp,
+			     ((optab2->handlers[(int) compute_mode].insn_code
+			       != CODE_FOR_nothing)
+			      ? OPTAB_DIRECT : OPTAB_WIDEN));
+
       if (quotient == 0)
 	{
 	  /* No luck there.  Try a quotient-and-remainder insn,
@@ -3525,6 +3696,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 
   if (rem_flag)
     {
+      if (target && GET_MODE (target) != compute_mode)
+	target = 0;
+
       if (quotient == 0)
 	/* No divide instruction either.  Use library for remainder.  */
 	remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
@@ -3707,7 +3881,7 @@ expand_and (op0, op1, target)
    to perform the operation.  It says to use zero-extension.
 
    NORMALIZEP is 1 if we should convert the result to be either zero
-   or one one.  Normalize is -1 if we should convert the result to be
+   or one.  Normalize is -1 if we should convert the result to be
    either zero or -1.  If NORMALIZEP is zero, the result will be left
    "raw" out of the scc insn.  */
 
@@ -3725,12 +3899,9 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
   enum machine_mode compare_mode;
   enum machine_mode target_mode = GET_MODE (target);
   rtx tem;
-  rtx last = 0;
+  rtx last = get_last_insn ();
   rtx pattern, comparison;
 
-  if (mode == VOIDmode)
-    mode = GET_MODE (op0);
-
   /* If one operand is constant, make it the second one.  Only do this
      if the other operand is not constant as well.  */
 
@@ -3743,9 +3914,12 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
       code = swap_condition (code);
     }
 
+  if (mode == VOIDmode)
+    mode = GET_MODE (op0);
+
   /* For some comparisons with 1 and -1, we can convert this to 
      comparisons with zero.  This will often produce more opportunities for
-     store-flag insns. */
+     store-flag insns.  */
 
   switch (code)
     {
@@ -3784,7 +3958,7 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
       && GET_MODE_CLASS (mode) == MODE_INT
       && (normalizep || STORE_FLAG_VALUE == 1
 	  || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
-	      && (STORE_FLAG_VALUE 
+	      && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
 		  == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
     {
       subtarget = target;
@@ -3803,9 +3977,11 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
 	subtarget = 0;
 
       if (code == GE)
-	op0 = expand_unop (mode, one_cmpl_optab, op0, subtarget, 0);
+	op0 = expand_unop (mode, one_cmpl_optab, op0,
+			   ((STORE_FLAG_VALUE == 1 || normalizep)
+			    ? 0 : subtarget), 0);
 
-      if (normalizep || STORE_FLAG_VALUE == 1)
+      if (STORE_FLAG_VALUE == 1 || normalizep)
 	/* If we are supposed to produce a 0/1 value, we want to do
 	   a logical shift from the sign bit to the low-order bit; for
 	   a -1/0 value, we do an arithmetic shift.  */
@@ -3923,10 +4099,13 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
 	}
     }
 
-  if (last)
-    delete_insns_since (last);
+  delete_insns_since (last);
 
-  subtarget = target_mode == mode ? target : 0;
+  /* If expensive optimizations, use different pseudo registers for each
+     insn, instead of reusing the same pseudo.  This leads to better CSE,
+     but slows down the compiler, since there are more pseudos */
+  subtarget = (!flag_expensive_optimizations
+	       && (target_mode == mode)) ? target : NULL_RTX;
 
   /* If we reached here, we can't do this with a scc insn.  However, there
      are some comparisons that can be done directly.  For example, if
@@ -3973,7 +4152,7 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
 	normalizep = STORE_FLAG_VALUE;
 
       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
-	       && (STORE_FLAG_VALUE
+	       && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
 		   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
 	;
       else
@@ -4071,21 +4250,65 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
   if (tem && normalizep)
     tem = expand_shift (RSHIFT_EXPR, mode, tem,
 			size_int (GET_MODE_BITSIZE (mode) - 1),
-			tem, normalizep == 1);
+			subtarget, normalizep == 1);
 
-  if (tem && GET_MODE (tem) != target_mode)
+  if (tem)
     {
-      convert_move (target, tem, 0);
-      tem = target;
+      if (GET_MODE (tem) != target_mode)
+	{
+	  convert_move (target, tem, 0);
+	  tem = target;
+	}
+      else if (!subtarget)
+	{
+	  emit_move_insn (target, tem);
+	  tem = target;
+	}
     }
-
-  if (tem == 0)
+  else
     delete_insns_since (last);
 
   return tem;
 }
-  emit_jump_insn ((*bcc_gen_fctn[(int) code]) (label));
+
+/* Like emit_store_flag, but always succeeds.  */
+
+rtx
+emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
+     rtx target;
+     enum rtx_code code;
+     rtx op0, op1;
+     enum machine_mode mode;
+     int unsignedp;
+     int normalizep;
+{
+  rtx tem, label;
+
+  /* First see if emit_store_flag can do the job.  */
+  tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
+  if (tem != 0)
+    return tem;
+
+  if (normalizep == 0)
+    normalizep = 1;
+
+  /* If this failed, we have to do this with set/compare/jump/set code.  */
+
+  if (GET_CODE (target) != REG
+      || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
+    target = gen_reg_rtx (GET_MODE (target));
+
   emit_move_insn (target, const1_rtx);
+  tem = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
+  if (GET_CODE (tem) == CONST_INT)
+    return tem;
+
+  label = gen_label_rtx ();
+  if (bcc_gen_fctn[(int) code] == 0)
+    abort ();
+
+  emit_jump_insn ((*bcc_gen_fctn[(int) code]) (label));
+  emit_move_insn (target, const0_rtx);
   emit_label (label);
 
   return target;