(force_to_mode, case PLUS): Sign extend masks that are negative in OP_MODE.

[pf3gnuchains/gcc-fork.git] / gcc / expmed.c
diff --git a/gcc/expmed.c b/gcc/expmed.c

index 46e16be..fc0a0ef 100644 (file)
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -1,6 +1,6 @@
  /* Medium-level subroutines: convert bit-field store and extract
     and shifts, multiplies and divides to rtl instructions.
-   Copyright (C) 1987, 88, 89, 92, 93, 1994 Free Software Foundation, Inc.
+   Copyright (C) 1987, 88, 89, 92, 93, 94, 1995 Free Software Foundation, Inc.
  
  This file is part of GNU CC.
  
@@ -61,11 +61,16 @@ static int sdiv_pow2_cheap, smod_pow2_cheap;
  #define MAX_BITS_PER_WORD BITS_PER_WORD
  #endif
  
-/* Cost of various pieces of RTL.  */
+/* Cost of various pieces of RTL.  Note that some of these are indexed by shift count,
+   and some by mode.  */
  static int add_cost, negate_cost, zero_cost;
  static int shift_cost[MAX_BITS_PER_WORD];
  static int shiftadd_cost[MAX_BITS_PER_WORD];
  static int shiftsub_cost[MAX_BITS_PER_WORD];
+static int mul_cost[NUM_MACHINE_MODES];
+static int div_cost[NUM_MACHINE_MODES];
+static int mul_widen_cost[NUM_MACHINE_MODES];
+static int mul_highpart_cost[NUM_MACHINE_MODES];
  
  void
  init_expmed ()
@@ -77,6 +82,7 @@ init_expmed ()
    rtx shift_insn, shiftadd_insn, shiftsub_insn;
    int dummy;
    int m;
+  enum machine_mode mode, wider_mode;
  
    start_sequence ();
  
@@ -138,6 +144,32 @@ init_expmed ()
      = (rtx_cost (gen_rtx (MOD, word_mode, reg, GEN_INT (32)), SET)
         <= 2 * add_cost);
  
+  for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
+       mode != VOIDmode;
+       mode = GET_MODE_WIDER_MODE (mode))
+    {
+      reg = gen_rtx (REG, mode, 10000);
+      div_cost[(int) mode] = rtx_cost (gen_rtx (UDIV, mode, reg, reg), SET);
+      mul_cost[(int) mode] = rtx_cost (gen_rtx (MULT, mode, reg, reg), SET);
+      wider_mode = GET_MODE_WIDER_MODE (mode);
+      if (wider_mode != VOIDmode)
+       {
+         mul_widen_cost[(int) wider_mode]
+           = rtx_cost (gen_rtx (MULT, wider_mode,
+                                gen_rtx (ZERO_EXTEND, wider_mode, reg),
+                                gen_rtx (ZERO_EXTEND, wider_mode, reg)),
+                       SET);
+         mul_highpart_cost[(int) mode]
+           = rtx_cost (gen_rtx (TRUNCATE, mode,
+                                gen_rtx (LSHIFTRT, wider_mode,
+                                         gen_rtx (MULT, wider_mode,
+                                                  gen_rtx (ZERO_EXTEND, wider_mode, reg),
+                                                  gen_rtx (ZERO_EXTEND, wider_mode, reg)),
+                                         GEN_INT (GET_MODE_BITSIZE (mode)))),
+                       SET);
+       }
+    }
+
    /* Free the objects we just allocated.  */
    end_sequence ();
    obfree (free_point);
@@ -221,13 +253,13 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
        op0 = SUBREG_REG (op0);
      }
  
-#if BYTES_BIG_ENDIAN
    /* If OP0 is a register, BITPOS must count within a word.
       But as we have it, it counts within whatever size OP0 now has.
       On a bigendian machine, these are not the same, so convert.  */
-  if (GET_CODE (op0) != MEM && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
+  if (BYTES_BIG_ENDIAN
+      && GET_CODE (op0) != MEM
+      && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
      bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
-#endif
  
    value = protect_from_queue (value, 0);
  
@@ -261,11 +293,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
       can be done with a movestrict instruction.  */
  
    if (GET_CODE (op0) != MEM
-#if BYTES_BIG_ENDIAN
-      && bitpos + bitsize == unit
-#else
-      && bitpos == 0
-#endif
+      && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
        && bitsize == GET_MODE_BITSIZE (fieldmode)
        && (GET_MODE (op0) == fieldmode
           || (movstrict_optab->handlers[(int) fieldmode].insn_code
@@ -299,7 +327,10 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
        /* Here we transfer the words of the field
          in the order least significant first.
          This is because the most significant word is the one which may
-        be less than full.  */
+        be less than full.
+        However, only do that if the value is not BLKmode.  */
+
+      int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
  
        int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
        int i;
@@ -315,8 +346,8 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
         {
           /* If I is 0, use the low-order word in both field and target;
              if I is 1, use the next to lowest word; and so on.  */
-         int wordnum = (WORDS_BIG_ENDIAN ? nwords - i - 1 : i);
-         int bit_offset = (WORDS_BIG_ENDIAN
+         int wordnum = (backwards ? nwords - i - 1 : i);
+         int bit_offset = (backwards
                             ? MAX (bitsize - (i + 1) * BITS_PER_WORD, 0)
                             : i * BITS_PER_WORD);
           store_bit_field (op0, MIN (BITS_PER_WORD,
@@ -410,7 +441,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
             bestmode = GET_MODE (op0);
  
           if (bestmode == VOIDmode
-             || (STRICT_ALIGNMENT && GET_MODE_SIZE (bestmode) > align))
+             || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
             goto insv_loses;
  
           /* Adjust address to point to the containing unit of that mode.  */
@@ -447,15 +478,14 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
        /* On big-endian machines, we count bits from the most significant.
          If the bit field insn does not, we must invert.  */
  
-#if BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN
-      xbitpos = unit - bitsize - xbitpos;
-#endif
+      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+       xbitpos = unit - bitsize - xbitpos;
+
        /* We have been counting XBITPOS within UNIT.
          Count instead within the size of the register.  */
-#if BITS_BIG_ENDIAN
-      if (GET_CODE (xop0) != MEM)
+      if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
-#endif
+
        unit = GET_MODE_BITSIZE (maxmode);
  
        /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
@@ -603,13 +633,12 @@ store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
       BITPOS is the starting bit number within OP0.
       (OP0's mode may actually be narrower than MODE.)  */
  
-#if BYTES_BIG_ENDIAN
-  /* BITPOS is the distance between our msb
-     and that of the containing datum.
-     Convert it to the distance from the lsb.  */
+  if (BYTES_BIG_ENDIAN)
+      /* BITPOS is the distance between our msb
+        and that of the containing datum.
+        Convert it to the distance from the lsb.  */
+      bitpos = total_bits - bitsize - bitpos;
  
-  bitpos = total_bits - bitsize - bitpos;
-#endif
    /* Now BITPOS is always the distance between our lsb
       and that of OP0.  */
  
@@ -718,7 +747,9 @@ store_split_bit_field (op0, bitsize, bitpos, value, align)
         value = word;
        else
         value = gen_lowpart_common (word_mode,
-                                   force_reg (GET_MODE (value), value));
+                                   force_reg (GET_MODE (value) != VOIDmode
+                                              ? GET_MODE (value)
+                                              : word_mode, value));
      }
  
    while (bitsdone < bitsize)
@@ -737,35 +768,33 @@ store_split_bit_field (op0, bitsize, bitpos, value, align)
        thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
        thissize = MIN (thissize, unit - thispos);
  
-#if BYTES_BIG_ENDIAN
-      /* Fetch successively less significant portions.  */
-      if (GET_CODE (value) == CONST_INT)
-       part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
-                        >> (bitsize - bitsdone - thissize))
-                       & (((HOST_WIDE_INT) 1 << thissize) - 1));
-      else
+      if (BYTES_BIG_ENDIAN)
         {
-         /* The args are chosen so that the last part
-            includes the lsb.  */
-         int bit_offset = 0;
-         /* If the value isn't in memory, then it must be right aligned
-            if a register, so skip past the padding on the left.  If it
-            is in memory, then there is no padding on the left.  */
-         if (GET_CODE (value) != MEM)
-           bit_offset = BITS_PER_WORD - bitsize;
-         part = extract_fixed_bit_field (word_mode, value, 0, thissize,
-                                         bit_offset + bitsdone,
-                                         NULL_RTX, 1, align);
+         /* Fetch successively less significant portions.  */
+         if (GET_CODE (value) == CONST_INT)
+           part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
+                            >> (bitsize - bitsdone - thissize))
+                           & (((HOST_WIDE_INT) 1 << thissize) - 1));
+         else
+           /* The args are chosen so that the last part includes the
+              lsb.  Give extract_bit_field the value it needs (with
+              endianness compensation) to fetch the piece we want.  */
+           part = extract_fixed_bit_field (word_mode, value, 0, thissize,
+                                           GET_MODE_BITSIZE (GET_MODE (value))
+                                           - bitsize + bitsdone,
+                                           NULL_RTX, 1, align);
         }
-#else
-      /* Fetch successively more significant portions.  */
-      if (GET_CODE (value) == CONST_INT)
-       part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) >> bitsdone)
-                       & (((HOST_WIDE_INT) 1 << thissize) - 1));
        else
-       part = extract_fixed_bit_field (word_mode, value, 0, thissize,
-                                       bitsdone, NULL_RTX, 1, align);
-#endif
+       {
+         /* Fetch successively more significant portions.  */
+         if (GET_CODE (value) == CONST_INT)
+           part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
+                            >> bitsdone)
+                           & (((HOST_WIDE_INT) 1 << thissize) - 1));
+         else
+           part = extract_fixed_bit_field (word_mode, value, 0, thissize,
+                                           bitsdone, NULL_RTX, 1, align);
+       }
  
        /* If OP0 is a register, then handle OFFSET here.
  
@@ -849,17 +878,35 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
      tmode = mode;
    while (GET_CODE (op0) == SUBREG)
      {
+      int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
+      int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
+
        offset += SUBREG_WORD (op0);
+
+      if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
+       {
+         bitpos += inner_size - outer_size;
+         if (bitpos > unit)
+           {
+             offset += (bitpos / unit);
+             bitpos %= unit;
+           }
+       }
+
        op0 = SUBREG_REG (op0);
      }
+
+  /* ??? We currently assume TARGET is at least as big as BITSIZE.
+     If that's wrong, the solution is to test for it and set TARGET to 0
+     if needed.  */
    
-#if BYTES_BIG_ENDIAN
    /* If OP0 is a register, BITPOS must count within a word.
       But as we have it, it counts within whatever size OP0 now has.
       On a bigendian machine, these are not the same, so convert.  */
-  if (GET_CODE (op0) != MEM && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
+  if (BYTES_BIG_ENDIAN &&
+      GET_CODE (op0) != MEM
+      && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
      bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
-#endif
  
    /* Extracting a full-word or multi-word value
       from a structure in a register or aligned memory.
@@ -875,12 +922,9 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
        && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
            && bitpos % BITS_PER_WORD == 0)
           || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
-#if BYTES_BIG_ENDIAN
-             && bitpos + bitsize == BITS_PER_WORD
-#else
-             && bitpos == 0
-#endif
-             )))
+             && (BYTES_BIG_ENDIAN
+                 ? bitpos + bitsize == BITS_PER_WORD
+                 : bitpos == 0))))
      {
        enum machine_mode mode1
         = mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0);
@@ -917,7 +961,11 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
         {
           /* If I is 0, use the low-order word in both field and target;
              if I is 1, use the next to lowest word; and so on.  */
-         int wordnum = (WORDS_BIG_ENDIAN ? nwords - i - 1 : i);
+         /* Word number in TARGET to use.  */
+         int wordnum = (WORDS_BIG_ENDIAN
+                        ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
+                        : i);
+         /* Offset from start of field in OP0.  */
           int bit_offset = (WORDS_BIG_ENDIAN
                             ? MAX (0, bitsize - (i + 1) * BITS_PER_WORD)
                             : i * BITS_PER_WORD);
@@ -937,7 +985,24 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
         }
  
        if (unsignedp)
-       return target;
+       {
+         /* Unless we've filled TARGET, the upper regs in a multi-reg value
+            need to be zero'd out.  */
+         if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
+           {
+             int i,total_words;
+
+             total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
+             for (i = nwords; i < total_words; i++)
+               {
+                 int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
+                 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
+                 emit_move_insn (target_part, const0_rtx);
+               }
+           }
+         return target;
+       }
+
        /* Signed bit field: sign-extend with two arithmetic shifts.  */
        target = expand_shift (LSHIFT_EXPR, mode, target,
                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
@@ -1020,7 +1085,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
                     bestmode = GET_MODE (xop0);
  
                   if (bestmode == VOIDmode
-                     || (STRICT_ALIGNMENT && GET_MODE_SIZE (bestmode) > align))
+                     || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
                     goto extzv_loses;
  
                   /* Compute offset as multiple of this unit,
@@ -1053,14 +1118,13 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
  
           /* On big-endian machines, we count bits from the most significant.
              If the bit field insn does not, we must invert.  */
-#if BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN
-         xbitpos = unit - bitsize - xbitpos;
-#endif
+         if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+           xbitpos = unit - bitsize - xbitpos;
+
           /* Now convert from counting within UNIT to counting in MAXMODE.  */
-#if BITS_BIG_ENDIAN
-         if (GET_CODE (xop0) != MEM)
+         if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
-#endif
+
           unit = GET_MODE_BITSIZE (maxmode);
  
           if (xtarget == 0
@@ -1157,7 +1221,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
                     bestmode = GET_MODE (xop0);
  
                   if (bestmode == VOIDmode
-                     || (STRICT_ALIGNMENT && GET_MODE_SIZE (bestmode) > align))
+                     || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
                     goto extv_loses;
  
                   /* Compute offset as multiple of this unit,
@@ -1188,15 +1252,14 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
  
           /* On big-endian machines, we count bits from the most significant.
              If the bit field insn does not, we must invert.  */
-#if BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN
-         xbitpos = unit - bitsize - xbitpos;
-#endif
+         if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+           xbitpos = unit - bitsize - xbitpos;
+
           /* XBITPOS counts within a size of UNIT.
              Adjust to count within a size of MAXMODE.  */
-#if BITS_BIG_ENDIAN
-         if (GET_CODE (xop0) != MEM)
+         if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
-#endif
+
           unit = GET_MODE_BITSIZE (maxmode);
  
           if (xtarget == 0
@@ -1350,12 +1413,14 @@ extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
  
    mode = GET_MODE (op0);
  
-#if BYTES_BIG_ENDIAN
-  /* BITPOS is the distance between our msb and that of OP0.
-     Convert it to the distance from the lsb.  */
+  if (BYTES_BIG_ENDIAN)
+    {
+      /* BITPOS is the distance between our msb and that of OP0.
+        Convert it to the distance from the lsb.  */
+
+      bitpos = total_bits - bitsize - bitpos;
+    }
  
-  bitpos = total_bits - bitsize - bitpos;
-#endif
    /* Now BITPOS is always the distance between the field's lsb and that of OP0.
       We have reduced the big-endian case to the little-endian case.  */
  
@@ -1432,7 +1497,8 @@ extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
  /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
     of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
     complement of that if COMPLEMENT.  The mask is truncated if
-   necessary to the width of mode MODE.  */
+   necessary to the width of mode MODE.  The mask is zero-extended if
+   BITSIZE+BITPOS is too small for MODE.  */
  
  static rtx
  mask_rtx (mode, bitpos, bitsize, complement)
@@ -1573,15 +1639,18 @@ extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
        bitsdone += thissize;
  
        /* Shift this part into place for the result.  */
-#if BYTES_BIG_ENDIAN
-      if (bitsize != bitsdone)
-       part = expand_shift (LSHIFT_EXPR, word_mode, part,
-                            build_int_2 (bitsize - bitsdone, 0), 0, 1);
-#else
-      if (bitsdone != thissize)
-       part = expand_shift (LSHIFT_EXPR, word_mode, part,
-                            build_int_2 (bitsdone - thissize, 0), 0, 1);
-#endif
+      if (BYTES_BIG_ENDIAN)
+       {
+         if (bitsize != bitsdone)
+           part = expand_shift (LSHIFT_EXPR, word_mode, part,
+                                build_int_2 (bitsize - bitsdone, 0), 0, 1);
+       }
+      else
+       {
+         if (bitsdone != thissize)
+           part = expand_shift (LSHIFT_EXPR, word_mode, part,
+                                build_int_2 (bitsdone - thissize, 0), 0, 1);
+       }
  
        if (first)
         result = part;
@@ -1658,7 +1727,7 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
  
    op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
  
-#if 0 && SHIFT_COUNT_TRUNCATED
+#if SHIFT_COUNT_TRUNCATED
    if (SHIFT_COUNT_TRUNCATED
        && GET_CODE (op1) == CONST_INT
        && (unsigned HOST_WIDE_INT) INTVAL (op1) >= GET_MODE_BITSIZE (mode))
@@ -1687,8 +1756,7 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
             continue;
           else if (methods == OPTAB_LIB_WIDEN)
             {
-             /* If we are rotating by a constant that is valid and
-                we have been unable to open-code this by a rotation,
+             /* If we have been unable to open-code this by a rotation,
                  do it as the IOR of two shifts.  I.e., to rotate A
                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
                  where C is the bitsize of A.
@@ -1700,25 +1768,25 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
                  this extremely unlikely lossage to avoid complicating the
                  code below.  */
  
-             if (GET_CODE (op1) == CONST_INT && INTVAL (op1) > 0
-                 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
-               {
-                 rtx subtarget = target == shifted ? 0 : target;
-                 rtx temp1;
-                 tree other_amount
-                   = build_int_2 (GET_MODE_BITSIZE (mode) - INTVAL (op1), 0);
-
-                 shifted = force_reg (mode, shifted);
-
-                 temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
-                                      mode, shifted, amount, subtarget, 1);
-                 temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
-                                       mode, shifted, other_amount, 0, 1);
-                 return expand_binop (mode, ior_optab, temp, temp1, target,
-                                      unsignedp, methods);
-               }
-             else
-               methods = OPTAB_LIB;
+             rtx subtarget = target == shifted ? 0 : target;
+             rtx temp1;
+             tree type = TREE_TYPE (amount);
+             tree new_amount = make_tree (type, op1);
+             tree other_amount
+               = fold (build (MINUS_EXPR, type,
+                              convert (type,
+                                       build_int_2 (GET_MODE_BITSIZE (mode),
+                                                    0)),
+                              amount));
+
+             shifted = force_reg (mode, shifted);
+
+             temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
+                                  mode, shifted, new_amount, subtarget, 1);
+             temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
+                                   mode, shifted, other_amount, 0, 1);
+             return expand_binop (mode, ior_optab, temp, temp1, target,
+                                  unsignedp, methods);
             }
  
           temp = expand_binop (mode,
@@ -2062,23 +2130,31 @@ expand_mult (mode, op0, op1, target, unsignedp)
  {
    rtx const_op1 = op1;
  
+  /* synth_mult does an `unsigned int' multiply.  As long as the mode is
+     less than or equal in size to `unsigned int' this doesn't matter.
+     If the mode is larger than `unsigned int', then synth_mult works only
+     if the constant value exactly fits in an `unsigned int' without any
+     truncation.  This means that multiplying by negative values does
+     not work; results are off by 2^32 on a 32 bit machine.  */
+
    /* If we are multiplying in DImode, it may still be a win
       to try to work with shifts and adds.  */
    if (GET_CODE (op1) == CONST_DOUBLE
        && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
-      && HOST_BITS_PER_INT <= BITS_PER_WORD)
-    {
-      if ((CONST_DOUBLE_HIGH (op1) == 0 && CONST_DOUBLE_LOW (op1) >= 0)
-         || (CONST_DOUBLE_HIGH (op1) == -1 && CONST_DOUBLE_LOW (op1) < 0))
-       const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
-    }
+      && HOST_BITS_PER_INT >= BITS_PER_WORD
+      && CONST_DOUBLE_HIGH (op1) == 0)
+    const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
+  else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
+          && GET_CODE (op1) == CONST_INT
+          && INTVAL (op1) < 0)
+    const_op1 = 0;
  
    /* We used to test optimize here, on the grounds that it's better to
       produce a smaller program when -O is not used.
       But this causes such a terrible slowdown sometimes
       that it seems better to use synth_mult always.  */
  
-  if (GET_CODE (const_op1) == CONST_INT)
+  if (const_op1 && GET_CODE (const_op1) == CONST_INT)
      {
        struct algorithm alg;
        struct algorithm alg2;
@@ -2096,10 +2172,16 @@ expand_mult (mode, op0, op1, target, unsignedp)
        mult_cost = MIN (12 * add_cost, mult_cost);
  
        synth_mult (&alg, val, mult_cost);
-      synth_mult (&alg2, - val,
-                 (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
-      if (alg2.cost + negate_cost < alg.cost)
-       alg = alg2, variant = negate_variant;
+
+      /* This works only if the inverted value actually fits in an
+        `unsigned int' */
+      if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
+       {
+         synth_mult (&alg2, - val,
+                     (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
+         if (alg2.cost + negate_cost < alg.cost)
+           alg = alg2, variant = negate_variant;
+       }
  
        /* This proves very useful for division-by-constant.  */
        synth_mult (&alg2, val - 1,
@@ -2436,14 +2518,17 @@ expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
  
     MODE is the mode of operation and result.
  
-   UNSIGNEDP nonzero means unsigned multiply.  */
+   UNSIGNEDP nonzero means unsigned multiply.
+
+   MAX_COST is the total allowed cost for the expanded RTL.  */
  
  rtx
-expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
+expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
       enum machine_mode mode;
       register rtx op0, target;
       unsigned HOST_WIDE_INT cnst1;
       int unsignedp;
+     int max_cost;
  {
    enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
    optab mul_highpart_optab;
@@ -2470,7 +2555,8 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
  
    /* expand_mult handles constant multiplication of word_mode
       or narrower.  It does a poor job for large modes.  */
-  if (size < BITS_PER_WORD)
+  if (size < BITS_PER_WORD
+      && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
      {
        /* We have to do this, since expand_binop doesn't do conversion for
          multiply.  Maybe change expand_binop to handle widening multiply?  */
@@ -2479,7 +2565,7 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
        tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, unsignedp);
        tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
                           build_int_2 (size, 0), NULL_RTX, 1);
-      return gen_lowpart (mode, tem);
+      return convert_modes (mode, wider_mode, tem, unsignedp);
      }
  
    if (target == 0)
@@ -2487,55 +2573,63 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
  
    /* Firstly, try using a multiplication insn that only generates the needed
       high part of the product, and in the sign flavor of unsignedp.  */
-  mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
-  target = expand_binop (mode, mul_highpart_optab,
-                        op0, op1, target, unsignedp, OPTAB_DIRECT);
-  if (target)
-    return target;
+  if (mul_highpart_cost[(int) mode] < max_cost)
+    {
+      mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
+      target = expand_binop (mode, mul_highpart_optab,
+                            op0, op1, target, unsignedp, OPTAB_DIRECT);
+      if (target)
+       return target;
+    }
  
    /* Secondly, same as above, but use sign flavor opposite of unsignedp.
       Need to adjust the result after the multiplication.  */
-  mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
-  target = expand_binop (mode, mul_highpart_optab,
-                        op0, op1, target, unsignedp, OPTAB_DIRECT);
-  if (target)
-    /* We used the wrong signedness.  Adjust the result.  */
-    return expand_mult_highpart_adjust (mode, target, op0,
-                                       op1, target, unsignedp);
-
-  /* Thirdly, we try to use a widening multiplication, or a wider mode
-     multiplication.  */
+  if (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)
+    {
+      mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
+      target = expand_binop (mode, mul_highpart_optab,
+                            op0, op1, target, unsignedp, OPTAB_DIRECT);
+      if (target)
+       /* We used the wrong signedness.  Adjust the result.  */
+       return expand_mult_highpart_adjust (mode, target, op0,
+                                           op1, target, unsignedp);
+    }
  
+  /* Try widening multiplication.  */
    moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
-  if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing)
-    ;
-  else if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing)
-    moptab = smul_optab;
-  else
+  if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
+      && mul_widen_cost[(int) wider_mode] < max_cost)
+    goto try;
+
+  /* Try widening the mode and perform a non-widening multiplication.  */
+  moptab = smul_optab;
+  if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
+      && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
+    goto try;
+
+  /* Try widening multiplication of opposite signedness, and adjust.  */
+  moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
+  if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
+      && (mul_widen_cost[(int) wider_mode]
+         + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
      {
-      /* Try widening multiplication of opposite signedness, and adjust.  */
-      moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
-      if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing)
+      tem = expand_binop (wider_mode, moptab, op0, wide_op1,
+                         NULL_RTX, ! unsignedp, OPTAB_WIDEN);
+      if (tem != 0)
         {
-         tem = expand_binop (wider_mode, moptab, op0, wide_op1,
-                             NULL_RTX, ! unsignedp, OPTAB_WIDEN);
-         if (tem != 0)
-           {
-             /* Extract the high half of the just generated product.  */
-             tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-                                 build_int_2 (size, 0), NULL_RTX, 1);
-             tem = gen_lowpart (mode, tem);
-             /* We used the wrong signedness.  Adjust the result.  */
-             return expand_mult_highpart_adjust (mode, tem, op0, op1,
-                                                 target, unsignedp);
-           }
+         /* Extract the high half of the just generated product.  */
+         tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
+                             build_int_2 (size, 0), NULL_RTX, 1);
+         tem = convert_modes (mode, wider_mode, tem, unsignedp);
+         /* We used the wrong signedness.  Adjust the result.  */
+         return expand_mult_highpart_adjust (mode, tem, op0, op1,
+                                             target, unsignedp);
         }
-
-      /* As a last resort, try widening the mode and perform a
-        non-widening multiplication.  */
-      moptab = smul_optab;
      }
  
+  return 0;
+
+ try:
    /* Pass NULL_RTX as target since TARGET has wrong mode.  */
    tem = expand_binop (wider_mode, moptab, op0, wide_op1,
                       NULL_RTX, unsignedp, OPTAB_WIDEN);
@@ -2545,7 +2639,7 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
    /* Extract the high half of the just generated product.  */
    tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
                       build_int_2 (size, 0), NULL_RTX, 1);
-  return gen_lowpart (mode, tem);
+  return convert_modes (mode, wider_mode, tem, unsignedp);
  }
  \f
  /* Emit the code to divide OP0 by OP1, putting the result in TARGET
@@ -2583,6 +2677,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
    rtx insn, set;
    optab optab1, optab2;
    int op1_is_constant, op1_is_pow2;
+  int max_cost, extra_cost;
  
    op1_is_constant = GET_CODE (op1) == CONST_INT;
    op1_is_pow2 = (op1_is_constant
@@ -2597,13 +2692,13 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
  
       Second comes a switch statement with code specific for each rounding mode.
       For some special operands this code emits all RTL for the desired
-     operation, for other cases, it generates a quotient and stores it in
+     operation, for other cases, it generates only a quotient and stores it in
       QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
       to indicate that it has not done anything.
  
-     Last comes code that finishes the operation.  If QUOTIENT is set an
-     REM_FLAG, the remainder is computed as OP0 - QUOTIENT * OP1.  If QUOTIENT
-     is not set, it is computed using trunc rounding.
+     Last comes code that finishes the operation.  If QUOTIENT is set and
+     REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
+     QUOTIENT is not set, it is computed using trunc rounding.
  
       We try to generate special code for division and remainder when OP1 is a
       constant.  If |OP1| = 2**n we can use shifts and some other fast
@@ -2687,11 +2782,14 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
    size = GET_MODE_BITSIZE (compute_mode);
  #if 0
    /* It should be possible to restrict the precision to GET_MODE_BITSIZE
-     (mode), and thereby get better code when OP1 is a constant.  Do that for
-     GCC 2.7.  It will require going over all usages of SIZE below.  */
+     (mode), and thereby get better code when OP1 is a constant.  Do that
+     later.  It will require going over all usages of SIZE below.  */
    size = GET_MODE_BITSIZE (mode);
  #endif
  
+  max_cost = div_cost[(int) compute_mode]
+    - (rem_flag ? mul_cost[(int) compute_mode] + add_cost : 0);
+
    /* Now convert to the best mode to use.  */
    if (compute_mode != mode)
      {
@@ -2729,7 +2827,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
        case TRUNC_DIV_EXPR:
         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
           {
-           if (unsignedp)
+           if (unsignedp
+               || (INTVAL (op1)
+                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (compute_mode) - 1)))
               {
                 unsigned HOST_WIDE_INT mh, ml;
                 int pre_shift, post_shift;
@@ -2790,8 +2890,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                       {
                         rtx t1, t2, t3, t4;
  
+                       extra_cost = (shift_cost[post_shift - 1]
+                                     + shift_cost[1] + 2 * add_cost);
                         t1 = expand_mult_highpart (compute_mode, op0, ml,
-                                                  NULL_RTX, 1);
+                                                  NULL_RTX, 1,
+                                                  max_cost - extra_cost);
                         if (t1 == 0)
                           goto fail1;
                         t2 = force_operand (gen_rtx (MINUS, compute_mode,
@@ -2814,8 +2917,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
                                            build_int_2 (pre_shift, 0),
                                            NULL_RTX, 1);
+                       extra_cost = (shift_cost[pre_shift]
+                                     + shift_cost[post_shift]);
                         t2 = expand_mult_highpart (compute_mode, t1, ml,
-                                                  NULL_RTX, 1);
+                                                  NULL_RTX, 1,
+                                                  max_cost - extra_cost);
                         if (t2 == 0)
                           goto fail1;
                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t2,
@@ -2890,6 +2996,8 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                                                  tquotient, 0);
                       }
  
+                   /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
+                      the quotient.  */
                     if (d < 0)
                       {
                         insn = get_last_insn ();
@@ -2914,8 +3022,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                       {
                         rtx t1, t2, t3;
  
+                       extra_cost = (shift_cost[post_shift]
+                                     + shift_cost[size - 1] + add_cost);
                         t1 = expand_mult_highpart (compute_mode, op0, ml,
-                                                  NULL_RTX, 0);
+                                                  NULL_RTX, 0,
+                                                  max_cost - extra_cost);
                         if (t1 == 0)
                           goto fail1;
                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
@@ -2934,8 +3045,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                         rtx t1, t2, t3, t4;
  
                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
+                       extra_cost = (shift_cost[post_shift]
+                                     + shift_cost[size - 1] + 2 * add_cost);
                         t1 = expand_mult_highpart (compute_mode, op0, ml,
-                                                  NULL_RTX, 0);
+                                                  NULL_RTX, 0,
+                                                  max_cost - extra_cost);
                         if (t1 == 0)
                           goto fail1;
                         t2 = force_operand (gen_rtx (PLUS, compute_mode, t1, op0),
@@ -3009,8 +3123,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                                        build_int_2 (size - 1, 0), NULL_RTX, 0);
                     t2 = expand_binop (compute_mode, xor_optab, op0, t1,
                                        NULL_RTX, 0, OPTAB_WIDEN);
+                   extra_cost = (shift_cost[post_shift]
+                                 + shift_cost[size - 1] + 2 * add_cost);
                     t3 = expand_mult_highpart (compute_mode, t2, ml,
-                                              NULL_RTX, 1);
+                                              NULL_RTX, 1,
+                                              max_cost - extra_cost);
                     if (t3 != 0)
                       {
                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
@@ -3058,14 +3175,17 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
            and return the result right away.  */
         if (target == 0)
           target = gen_reg_rtx (compute_mode);
+
         if (rem_flag)
           {
-           remainder = target;
+           remainder
+             = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
             quotient = gen_reg_rtx (compute_mode);
           }
         else
           {
-           quotient = target;
+           quotient
+             = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
             remainder = gen_reg_rtx (compute_mode);
           }
  
@@ -3182,14 +3302,17 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                value here, and return the result right away.  */
             if (target == 0)
               target = gen_reg_rtx (compute_mode);
+
             if (rem_flag)
               {
-               remainder = target;
+               remainder = (GET_CODE (target) == REG
+                            ? target : gen_reg_rtx (compute_mode));
                 quotient = gen_reg_rtx (compute_mode);
               }
             else
               {
-               quotient = target;
+               quotient = (GET_CODE (target) == REG
+                           ? target : gen_reg_rtx (compute_mode));
                 remainder = gen_reg_rtx (compute_mode);
               }
  
@@ -3236,6 +3359,44 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
           }
         else /* signed */
           {
+           if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
+               && INTVAL (op1) >= 0)
+             {
+               /* This is extremely similar to the code for the unsigned case
+                  above.  For 2.7 we should merge these variants, but for
+                  2.6.1 I don't want to touch the code for unsigned since that
+                  get used in C.  The signed case will only be used by other
+                  languages (Ada).  */
+
+               rtx t1, t2, t3;
+               unsigned HOST_WIDE_INT d = INTVAL (op1);
+               t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
+                                  build_int_2 (floor_log2 (d), 0),
+                                  tquotient, 0);
+               t2 = expand_binop (compute_mode, and_optab, op0,
+                                  GEN_INT (d - 1),
+                                  NULL_RTX, 1, OPTAB_LIB_WIDEN);
+               t3 = gen_reg_rtx (compute_mode);
+               t3 = emit_store_flag (t3, NE, t2, const0_rtx,
+                                     compute_mode, 1, 1);
+               if (t3 == 0)
+                 {
+                   rtx lab;
+                   lab = gen_label_rtx ();
+                   emit_cmp_insn (t2, const0_rtx, EQ, NULL_RTX,
+                                  compute_mode, 0, 0);
+                   emit_jump_insn (gen_beq (lab));
+                   expand_inc (t1, const1_rtx);
+                   emit_label (lab);
+                   quotient = t1;
+                 }
+               else
+                 quotient = force_operand (gen_rtx (PLUS, compute_mode,
+                                                    t1, t3),
+                                           tquotient);
+               break;
+             }
+
             /* Try using an instruction that produces both the quotient and
                remainder, using truncation.  We can easily compensate the
                quotient or remainder to get ceiling rounding, once we have the
@@ -3245,12 +3406,14 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
               target = gen_reg_rtx (compute_mode);
             if (rem_flag)
               {
-               remainder = target;
+               remainder= (GET_CODE (target) == REG
+                           ? target : gen_reg_rtx (compute_mode));
                 quotient = gen_reg_rtx (compute_mode);
               }
             else
               {
-               quotient = target;
+               quotient = (GET_CODE (target) == REG
+                           ? target : gen_reg_rtx (compute_mode));
                 remainder = gen_reg_rtx (compute_mode);
               }
  
@@ -3355,10 +3518,70 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
  
        case ROUND_DIV_EXPR:
        case ROUND_MOD_EXPR:
-       /* The code that used to be here was wrong, and nothing really
-          depends on it.  */
-       abort ();
-       break;
+       if (unsignedp)
+         {
+           rtx tem;
+           rtx label;
+           label = gen_label_rtx ();
+           quotient = gen_reg_rtx (compute_mode);
+           remainder = gen_reg_rtx (compute_mode);
+           if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
+             {
+               rtx tem;
+               quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
+                                        quotient, 1, OPTAB_LIB_WIDEN);
+               tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
+               remainder = expand_binop (compute_mode, sub_optab, op0, tem,
+                                         remainder, 1, OPTAB_LIB_WIDEN);
+             }
+           tem = plus_constant (op1, -1);
+           tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
+                               build_int_2 (1, 0), NULL_RTX, 1);
+           emit_cmp_insn (remainder, tem, LEU, NULL_RTX, compute_mode, 0, 0);
+           emit_jump_insn (gen_bleu (label));
+           expand_inc (quotient, const1_rtx);
+           expand_dec (remainder, op1);
+           emit_label (label);
+         }
+       else
+         {
+           rtx abs_rem, abs_op1, tem, mask;
+           rtx label;
+           label = gen_label_rtx ();
+           quotient = gen_reg_rtx (compute_mode);
+           remainder = gen_reg_rtx (compute_mode);
+           if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
+             {
+               rtx tem;
+               quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
+                                        quotient, 0, OPTAB_LIB_WIDEN);
+               tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
+               remainder = expand_binop (compute_mode, sub_optab, op0, tem,
+                                         remainder, 0, OPTAB_LIB_WIDEN);
+             }
+           abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 0, 0);
+           abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 0, 0);
+           tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
+                               build_int_2 (1, 0), NULL_RTX, 1);
+           emit_cmp_insn (tem, abs_op1, LTU, NULL_RTX, compute_mode, 0, 0);
+           emit_jump_insn (gen_bltu (label));
+           tem = expand_binop (compute_mode, xor_optab, op0, op1,
+                               NULL_RTX, 0, OPTAB_WIDEN);
+           mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
+                               build_int_2 (size - 1, 0), NULL_RTX, 0);
+           tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
+                               NULL_RTX, 0, OPTAB_WIDEN);
+           tem = expand_binop (compute_mode, sub_optab, tem, mask,
+                               NULL_RTX, 0, OPTAB_WIDEN);
+           expand_inc (quotient, tem);
+           tem = expand_binop (compute_mode, xor_optab, mask, op1,
+                               NULL_RTX, 0, OPTAB_WIDEN);
+           tem = expand_binop (compute_mode, sub_optab, tem, mask,
+                               NULL_RTX, 0, OPTAB_WIDEN);
+           expand_dec (remainder, tem);
+           emit_label (label);
+         }
+       return gen_lowpart (mode, rem_flag ? remainder : quotient);
        }
  
    if (quotient == 0)
@@ -3617,9 +3840,6 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
    rtx last = 0;
    rtx pattern, comparison;
  
-  if (mode == VOIDmode)
-    mode = GET_MODE (op0);
-
    /* If one operand is constant, make it the second one.  Only do this
       if the other operand is not constant as well.  */
  
@@ -3632,6 +3852,9 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
        code = swap_condition (code);
      }
  
+  if (mode == VOIDmode)
+    mode = GET_MODE (op0);
+
    /* For some comparisons with 1 and -1, we can convert this to 
       comparisons with zero.  This will often produce more opportunities for
       store-flag insns. */