(c_sizeof, build_c_cast): Set TREE_OVERFLOW in addition

[pf3gnuchains/gcc-fork.git] / gcc / expmed.c
diff --git a/gcc/expmed.c b/gcc/expmed.c

index 3aebb70..6fb8579 100644 (file)
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -1,6 +1,6 @@
  /* Medium-level subroutines: convert bit-field store and extract
     and shifts, multiplies and divides to rtl instructions.
-   Copyright (C) 1987, 1988, 1989, 1992 Free Software Foundation, Inc.
+   Copyright (C) 1987, 1988, 1989, 1992, 1993 Free Software Foundation, Inc.
  
  This file is part of GNU CC.
  
@@ -49,61 +49,102 @@ int mult_is_very_cheap;
  
  static int sdiv_pow2_cheap, smod_pow2_cheap;
  
-/* Cost of various pieces of RTL.  */
-static int add_cost, shift_cost, mult_cost, negate_cost, lea_cost;
+/* For compilers that support multiple targets with different word sizes,
+   MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
+   is the H8/300(H) compiler.  */
  
-/* Max scale factor for scaled address in lea instruction.  */
-static int lea_max_mul;
+#ifndef MAX_BITS_PER_WORD
+#define MAX_BITS_PER_WORD BITS_PER_WORD
+#endif
+
+/* Cost of various pieces of RTL.  */
+static int add_cost, mult_cost, negate_cost, zero_cost;
+static int shift_cost[MAX_BITS_PER_WORD];
+static int shiftadd_cost[MAX_BITS_PER_WORD];
+static int shiftsub_cost[MAX_BITS_PER_WORD];
  
  void
  init_expmed ()
  {
-  char *free_point = (char *) oballoc (1);
+  char *free_point;
    /* This is "some random pseudo register" for purposes of calling recog
       to see what insns exist.  */
    rtx reg = gen_rtx (REG, word_mode, FIRST_PSEUDO_REGISTER);
-  rtx pow2 = gen_rtx (CONST_INT, VOIDmode, 32);
-  rtx lea;
-  int i, dummy;
-
-  add_cost = rtx_cost (gen_rtx (PLUS, word_mode, reg, reg));
-  shift_cost = rtx_cost (gen_rtx (LSHIFT, word_mode, reg,
-                                 /* Using a constant gives better
-                                    estimate of typical costs.
-                                    1 or 2 might have quirks.  */
-                                 gen_rtx (CONST_INT, VOIDmode, 3)));
-  mult_cost = rtx_cost (gen_rtx (MULT, word_mode, reg, reg));
-  negate_cost = rtx_cost (gen_rtx (NEG, word_mode, reg));
+  rtx shift_insn, shiftadd_insn, shiftsub_insn;
+  int dummy;
+  int m;
  
-  /* 999999 is chosen to avoid any plausible faster special case.  */
-  mult_is_very_cheap
-    = (rtx_cost (gen_rtx (MULT, word_mode, reg,
-                         gen_rtx (CONST_INT, VOIDmode, 999999)))
-       < rtx_cost (gen_rtx (LSHIFT, word_mode, reg,
-                           gen_rtx (CONST_INT, VOIDmode, 7))));
+  start_sequence ();
  
-  sdiv_pow2_cheap
-    = rtx_cost (gen_rtx (DIV, word_mode, reg, pow2)) <= 2 * add_cost;
-  smod_pow2_cheap
-    = rtx_cost (gen_rtx (MOD, word_mode, reg, pow2)) <= 2 * add_cost;
+  /* Since we are on the permanent obstack, we must be sure we save this
+     spot AFTER we call start_sequence, since it will reuse the rtl it
+     makes.  */
+
+  free_point = (char *) oballoc (0);
+
+  zero_cost = rtx_cost (const0_rtx, 0);
+  add_cost = rtx_cost (gen_rtx (PLUS, word_mode, reg, reg), SET);
+
+  shift_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
+                                  gen_rtx (ASHIFT, word_mode, reg,
+                                           const0_rtx)));
+
+  shiftadd_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
+                                     gen_rtx (PLUS, word_mode,
+                                              gen_rtx (MULT, word_mode,
+                                                       reg, const0_rtx),
+                                              reg)));
+
+  shiftsub_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
+                                     gen_rtx (MINUS, word_mode,
+                                              gen_rtx (MULT, word_mode,
+                                                        reg, const0_rtx),
+                                               reg)));
  
    init_recog ();
-  for (i = 2;; i <<= 1)
+
+  shift_cost[0] = 0;
+  shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
+
+  for (m = 1; m < BITS_PER_WORD; m++)
      {
-      lea = gen_rtx (SET, VOIDmode, reg,
-                    gen_rtx (PLUS, word_mode,
-                             gen_rtx (MULT, word_mode, reg,
-                                      gen_rtx (CONST_INT, VOIDmode, i)),
-                             reg));
-      /* Using 0 as second argument is not quite right,
-        but what else is there to do?  */
-      if (recog (lea, 0, &dummy) < 0)
-       break;
-      lea_max_mul = i;
-      lea_cost = rtx_cost (SET_SRC (lea));
+      shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
+
+      XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
+      if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
+       shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
+
+      XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
+       = GEN_INT ((HOST_WIDE_INT) 1 << m);
+      if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
+       shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
+
+      XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
+       = GEN_INT ((HOST_WIDE_INT) 1 << m);
+      if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
+       shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
      }
  
+  mult_cost = rtx_cost (gen_rtx (MULT, word_mode, reg, reg), SET);
+  /* For gcc 2.4 keep MULT_COST small to avoid really slow searches
+     in synth_mult.  */
+  mult_cost = MIN (12 * add_cost, mult_cost);
+  negate_cost = rtx_cost (gen_rtx (NEG, word_mode, reg), SET);
+
+  /* 999999 is chosen to avoid any plausible faster special case.  */
+  mult_is_very_cheap
+    = (rtx_cost (gen_rtx (MULT, word_mode, reg, GEN_INT (999999)), SET)
+       < rtx_cost (gen_rtx (ASHIFT, word_mode, reg, GEN_INT (7)), SET));
+
+  sdiv_pow2_cheap
+    = (rtx_cost (gen_rtx (DIV, word_mode, reg, GEN_INT (32)), SET)
+       <= 2 * add_cost);
+  smod_pow2_cheap
+    = (rtx_cost (gen_rtx (MOD, word_mode, reg, GEN_INT (32)), SET)
+       <= 2 * add_cost);
+
    /* Free the objects we just allocated.  */
+  end_sequence ();
    obfree (free_point);
  }
  
@@ -118,19 +159,19 @@ negate_rtx (mode, x)
  {
    if (GET_CODE (x) == CONST_INT)
      {
-      int val = - INTVAL (x);
-      if (GET_MODE_BITSIZE (mode) < HOST_BITS_PER_INT)
+      HOST_WIDE_INT val = - INTVAL (x);
+      if (GET_MODE_BITSIZE (mode) < HOST_BITS_PER_WIDE_INT)
         {
           /* Sign extend the value from the bits that are significant.  */
-         if (val & (1 << (GET_MODE_BITSIZE (mode) - 1)))
-           val |= (-1) << GET_MODE_BITSIZE (mode);
+         if (val & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
+           val |= (HOST_WIDE_INT) (-1) << GET_MODE_BITSIZE (mode);
           else
-           val &= (1 << GET_MODE_BITSIZE (mode)) - 1;
+           val &= ((HOST_WIDE_INT) 1 << GET_MODE_BITSIZE (mode)) - 1;
         }
-      return gen_rtx (CONST_INT, VOIDmode, val);
+      return GEN_INT (val);
      }
    else
-    return expand_unop (GET_MODE (x), neg_optab, x, 0, 0);
+    return expand_unop (GET_MODE (x), neg_optab, x, NULL_RTX, 0);
  }
  \f
  /* Generate code to store value from rtx VALUE
@@ -200,13 +241,18 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
  
    /* Note that the adjustment of BITPOS above has no effect on whether
       BITPOS is 0 in a REG bigger than a word.  */
-  if (GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD && GET_CODE (op0) != MEM
+  if (GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
+      && (! STRICT_ALIGNMENT || GET_CODE (op0) != MEM)
        && bitpos == 0 && bitsize == GET_MODE_BITSIZE (fieldmode))
      {
        /* Storing in a full-word or multi-word field in a register
          can be done with just SUBREG.  */
        if (GET_MODE (op0) != fieldmode)
-       op0 = gen_rtx (SUBREG, fieldmode, op0, offset);
+       if (GET_CODE (op0) == REG)
+         op0 = gen_rtx (SUBREG, fieldmode, op0, offset);
+       else
+         op0 = change_address (op0, fieldmode,
+                               plus_constant (XEXP (op0, 0), offset));
        emit_move_insn (op0, value);
        return value;
      }
@@ -341,9 +387,8 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
           if (GET_MODE (op0) == BLKmode
               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
             bestmode
-             = get_best_mode (bitsize, bitnum,
-                              align * BITS_PER_UNIT, maxmode,
-                              GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
+             = get_best_mode (bitsize, bitnum, align * BITS_PER_UNIT, maxmode,
+                              MEM_VOLATILE_P (op0));
           else
             bestmode = GET_MODE (op0);
  
@@ -400,12 +445,18 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
             {
               /* Optimization: Don't bother really extending VALUE
-                if it has all the bits we will actually use.  */
+                if it has all the bits we will actually use.  However,
+                if we must narrow it, be sure we do it correctly.  */
  
-             /* Avoid making subreg of a subreg, or of a mem.  */
-             if (GET_CODE (value1) != REG)
+             if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
+               {
+                 /* Avoid making subreg of a subreg, or of a mem.  */
+                 if (GET_CODE (value1) != REG)
                 value1 = copy_to_reg (value1);
-             value1 = gen_rtx (SUBREG, maxmode, value1, 0);
+                 value1 = gen_rtx (SUBREG, maxmode, value1, 0);
+               }
+             else
+               value1 = gen_lowpart (maxmode, value1);
             }
           else if (!CONSTANT_P (value))
             /* Parse phase is supposed to make VALUE's data type
@@ -421,10 +472,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
              (value1, maxmode)))
         value1 = force_reg (maxmode, value1);
  
-      pat = gen_insv (xop0,
-                     gen_rtx (CONST_INT, VOIDmode, bitsize),
-                     gen_rtx (CONST_INT, VOIDmode, xbitpos),
-                     value1);
+      pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
        if (pat)
         emit_insn (pat);
        else
@@ -542,15 +590,16 @@ store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
  
    if (GET_CODE (value) == CONST_INT)
      {
-      register int v = INTVAL (value);
+      register HOST_WIDE_INT v = INTVAL (value);
  
-      if (bitsize < HOST_BITS_PER_INT)
-       v &= (1 << bitsize) - 1;
+      if (bitsize < HOST_BITS_PER_WIDE_INT)
+       v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
  
        if (v == 0)
         all_zero = 1;
-      else if ((bitsize < HOST_BITS_PER_INT && v == (1 << bitsize) - 1)
-              || (bitsize == HOST_BITS_PER_INT && v == -1))
+      else if ((bitsize < HOST_BITS_PER_WIDE_INT
+               && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
+              || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
         all_one = 1;
  
        value = lshift_value (mode, value, bitpos, bitsize);
@@ -583,10 +632,10 @@ store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
        if (must_and)
         value = expand_binop (mode, and_optab, value,
                               mask_rtx (mode, 0, bitsize, 0),
-                             0, 1, OPTAB_LIB_WIDEN);
+                             NULL_RTX, 1, OPTAB_LIB_WIDEN);
        if (bitpos > 0)
         value = expand_shift (LSHIFT_EXPR, mode, value,
-                             build_int_2 (bitpos, 0), 0, 1);
+                             build_int_2 (bitpos, 0), NULL_RTX, 1);
      }
  
    /* Now clear the chosen bits in OP0,
@@ -642,8 +691,13 @@ store_split_bit_field (op0, bitsize, bitpos, value, align)
  
    if (GET_MODE (value) != VOIDmode)
      value = convert_to_mode (word_mode, value, 1);
+
+  if (GET_CODE (value) == CONST_DOUBLE
+      && (part1 = gen_lowpart_common (word_mode, value)) != 0)
+    value = part1;
+
    if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
-    value = copy_to_reg (value);
+    value = copy_to_mode_reg (word_mode, value);
  
    /* Split the value into two parts:
       PART1 gets that which goes in the first word; PART2 the other.  */
@@ -651,35 +705,33 @@ store_split_bit_field (op0, bitsize, bitpos, value, align)
    /* PART1 gets the more significant part.  */
    if (GET_CODE (value) == CONST_INT)
      {
-      part1 = gen_rtx (CONST_INT, VOIDmode,
-                      (unsigned) (INTVAL (value)) >> bitsize_2);
-      part2 = gen_rtx (CONST_INT, VOIDmode,
-                      (unsigned) (INTVAL (value)) & ((1 << bitsize_2) - 1));
+      part1 = GEN_INT ((unsigned HOST_WIDE_INT) (INTVAL (value)) >> bitsize_2);
+      part2 = GEN_INT ((unsigned HOST_WIDE_INT) (INTVAL (value))
+                      & (((HOST_WIDE_INT) 1 << bitsize_2) - 1));
      }
    else
      {
        part1 = extract_fixed_bit_field (word_mode, value, 0, bitsize_1,
-                                      BITS_PER_WORD - bitsize, 0, 1,
+                                      BITS_PER_WORD - bitsize, NULL_RTX, 1,
                                        BITS_PER_WORD);
        part2 = extract_fixed_bit_field (word_mode, value, 0, bitsize_2,
-                                      BITS_PER_WORD - bitsize_2, 0, 1,
+                                      BITS_PER_WORD - bitsize_2, NULL_RTX, 1,
                                        BITS_PER_WORD);
      }
  #else
    /* PART1 gets the less significant part.  */
    if (GET_CODE (value) == CONST_INT)
      {
-      part1 = gen_rtx (CONST_INT, VOIDmode,
-                      (unsigned) (INTVAL (value)) & ((1 << bitsize_1) - 1));
-      part2 = gen_rtx (CONST_INT, VOIDmode,
-                      (unsigned) (INTVAL (value)) >> bitsize_1);
+      part1 = GEN_INT ((unsigned HOST_WIDE_INT) (INTVAL (value))
+                      & (((HOST_WIDE_INT) 1 << bitsize_1) - 1));
+      part2 = GEN_INT ((unsigned HOST_WIDE_INT) (INTVAL (value)) >> bitsize_1);
      }
    else
      {
        part1 = extract_fixed_bit_field (word_mode, value, 0, bitsize_1, 0,
-                                      0, 1, BITS_PER_WORD);
+                                      NULL_RTX, 1, BITS_PER_WORD);
        part2 = extract_fixed_bit_field (word_mode, value, 0, bitsize_2,
-                                      bitsize_1, 0, 1, BITS_PER_WORD);
+                                      bitsize_1, NULL_RTX, 1, BITS_PER_WORD);
      }
  #endif
  
@@ -765,7 +817,6 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
  
    if (tmode == VOIDmode)
      tmode = mode;
-
    while (GET_CODE (op0) == SUBREG)
      {
        offset += SUBREG_WORD (op0);
@@ -914,8 +965,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
                           > GET_MODE_SIZE (maxmode)))
                     bestmode = get_best_mode (bitsize, bitnum,
                                               align * BITS_PER_UNIT, maxmode,
-                                             (GET_CODE (xop0) == MEM
-                                              && MEM_VOLATILE_P (xop0)));
+                                             MEM_VOLATILE_P (xop0));
                   else
                     bestmode = GET_MODE (xop0);
  
@@ -969,7 +1019,13 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
           if (GET_MODE (xtarget) != maxmode)
             {
               if (GET_CODE (xtarget) == REG)
-               xspec_target_subreg = xtarget = gen_lowpart (maxmode, xtarget);
+               {
+                 int wider = (GET_MODE_SIZE (maxmode)
+                              > GET_MODE_SIZE (GET_MODE (xtarget)));
+                 xtarget = gen_lowpart (maxmode, xtarget);
+                 if (wider)
+                   xspec_target_subreg = xtarget;
+               }
               else
                 xtarget = gen_reg_rtx (maxmode);
             }
@@ -980,8 +1036,8 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
                  (xtarget, maxmode)))
             xtarget = gen_reg_rtx (maxmode);
  
-         bitsize_rtx = gen_rtx (CONST_INT, VOIDmode, bitsize);
-         bitpos_rtx = gen_rtx (CONST_INT, VOIDmode, xbitpos);
+         bitsize_rtx = GEN_INT (bitsize);
+         bitpos_rtx = GEN_INT (xbitpos);
  
           pat = gen_extzv (protect_from_queue (xtarget, 1),
                            xop0, bitsize_rtx, bitpos_rtx);
@@ -1042,8 +1098,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
                           > GET_MODE_SIZE (maxmode)))
                     bestmode = get_best_mode (bitsize, bitnum,
                                               align * BITS_PER_UNIT, maxmode,
-                                             (GET_CODE (xop0) == MEM
-                                              && MEM_VOLATILE_P (xop0)));
+                                             MEM_VOLATILE_P (xop0));
                   else
                     bestmode = GET_MODE (xop0);
  
@@ -1096,7 +1151,13 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
           if (GET_MODE (xtarget) != maxmode)
             {
               if (GET_CODE (xtarget) == REG)
-               xspec_target_subreg = xtarget = gen_lowpart (maxmode, xtarget);
+               {
+                 int wider = (GET_MODE_SIZE (maxmode)
+                              > GET_MODE_SIZE (GET_MODE (xtarget)));
+                 xtarget = gen_lowpart (maxmode, xtarget);
+                 if (wider)
+                   xspec_target_subreg = xtarget;
+               }
               else
                 xtarget = gen_reg_rtx (maxmode);
             }
@@ -1107,8 +1168,8 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
                  (xtarget, maxmode)))
             xtarget = gen_reg_rtx (maxmode);
  
-         bitsize_rtx = gen_rtx (CONST_INT, VOIDmode, bitsize);
-         bitpos_rtx = gen_rtx (CONST_INT, VOIDmode, xbitpos);
+         bitsize_rtx = GEN_INT (bitsize);
+         bitpos_rtx = GEN_INT (xbitpos);
  
           pat = gen_extv (protect_from_queue (xtarget, 1),
                           xop0, bitsize_rtx, bitpos_rtx);
@@ -1212,6 +1273,16 @@ extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
  
        total_bits = GET_MODE_BITSIZE (mode);
  
+      /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
+        be be in the range 0 to total_bits-1, and put any excess bytes in
+        OFFSET.  */
+      if (bitpos >= total_bits)
+       {
+         offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
+         bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
+                    * BITS_PER_UNIT);
+       }
+
        /* Get ref to an aligned byte, halfword, or word containing the field.
          Adjust BITPOS to be position within a word,
          and OFFSET to be the offset of that word.
@@ -1313,23 +1384,25 @@ mask_rtx (mode, bitpos, bitsize, complement)
       enum machine_mode mode;
       int bitpos, bitsize, complement;
  {
-  int masklow, maskhigh;
+  HOST_WIDE_INT masklow, maskhigh;
  
-  if (bitpos < HOST_BITS_PER_INT)
-    masklow = -1 << bitpos;
+  if (bitpos < HOST_BITS_PER_WIDE_INT)
+    masklow = (HOST_WIDE_INT) -1 << bitpos;
    else
      masklow = 0;
  
-  if (bitpos + bitsize < HOST_BITS_PER_INT)
-    masklow &= (unsigned) -1 >> (HOST_BITS_PER_INT - bitpos - bitsize);
+  if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
+    masklow &= ((unsigned HOST_WIDE_INT) -1
+               >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
    
-  if (bitpos <= HOST_BITS_PER_INT)
+  if (bitpos <= HOST_BITS_PER_WIDE_INT)
      maskhigh = -1;
    else
-    maskhigh = -1 << (bitpos - HOST_BITS_PER_INT);
+    maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
  
-  if (bitpos + bitsize > HOST_BITS_PER_INT)
-    maskhigh &= (unsigned) -1 >> (2 * HOST_BITS_PER_INT - bitpos - bitsize);
+  if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
+    maskhigh &= ((unsigned HOST_WIDE_INT) -1
+                >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
    else
      maskhigh = 0;
  
@@ -1351,21 +1424,21 @@ lshift_value (mode, value, bitpos, bitsize)
       rtx value;
       int bitpos, bitsize;
  {
-  unsigned v = INTVAL (value);
-  int low, high;
+  unsigned HOST_WIDE_INT v = INTVAL (value);
+  HOST_WIDE_INT low, high;
  
-  if (bitsize < HOST_BITS_PER_INT)
-    v &= ~(-1 << bitsize);
+  if (bitsize < HOST_BITS_PER_WIDE_INT)
+    v &= ~((HOST_WIDE_INT) -1 << bitsize);
  
-  if (bitpos < HOST_BITS_PER_INT)
+  if (bitpos < HOST_BITS_PER_WIDE_INT)
      {
        low = v << bitpos;
-      high = (bitpos > 0 ? (v >> (HOST_BITS_PER_INT - bitpos)) : 0);
+      high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
      }
    else
      {
        low = 0;
-      high = v << (bitpos - HOST_BITS_PER_INT);
+      high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
      }
  
    return immed_double_const (low, high, mode);
@@ -1403,7 +1476,8 @@ extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
           : operand_subword_force (op0, offset, GET_MODE (op0)));
    part1 = extract_fixed_bit_field (word_mode, word,
                                    GET_CODE (op0) == MEM ? offset : 0,
-                                  bitsize_1, bitpos % unit, 0, 1, align);
+                                  bitsize_1, bitpos % unit, NULL_RTX,
+                                  1, align);
  
    /* Offset op0 by 1 word to get to the following one.  */
    if (GET_CODE (op0) == SUBREG)
@@ -1419,7 +1493,7 @@ extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
                                    (GET_CODE (op0) == MEM
                                     ? CEIL (offset + 1, UNITS_PER_WORD) * UNITS_PER_WORD
                                     : 0),
-                                  bitsize_2, 0, 0, 1, align);
+                                  bitsize_2, 0, NULL_RTX, 1, align);
  
    /* Shift the more significant part up to fit above the other part.  */
  #if BYTES_BIG_ENDIAN
@@ -1432,7 +1506,7 @@ extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
  
    /* Combine the two parts with bitwise or.  This works
       because we extracted both parts as unsigned bit fields.  */
-  result = expand_binop (word_mode, ior_optab, part1, part2, 0, 1,
+  result = expand_binop (word_mode, ior_optab, part1, part2, NULL_RTX, 1,
                          OPTAB_LIB_WIDEN);
  
    /* Unsigned bit field: we are done.  */
@@ -1440,9 +1514,10 @@ extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
      return result;
    /* Signed bit field: sign-extend with two arithmetic shifts.  */
    result = expand_shift (LSHIFT_EXPR, word_mode, result,
-                        build_int_2 (BITS_PER_WORD - bitsize, 0), 0, 0);
+                        build_int_2 (BITS_PER_WORD - bitsize, 0),
+                        NULL_RTX, 0);
    return expand_shift (RSHIFT_EXPR, word_mode, result,
-                      build_int_2 (BITS_PER_WORD - bitsize, 0), 0, 0);
+                      build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
  }
  \f
  /* Add INC into TARGET.  */
@@ -1496,7 +1571,7 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
       and shifted in the other direction; but that does not work
       on all machines.  */
  
-  op1 = expand_expr (amount, 0, VOIDmode, 0);
+  op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
  
    if (op1 == const0_rtx)
      return shifted;
@@ -1518,11 +1593,56 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
           if (methods == OPTAB_WIDEN)
             continue;
           else if (methods == OPTAB_LIB_WIDEN)
-           methods = OPTAB_LIB;
+           {
+             /* If we are rotating by a constant that is valid and
+                we have been unable to open-code this by a rotation,
+                do it as the IOR of two shifts.  I.e., to rotate A
+                by N bits, compute (A << N) | ((unsigned) A >> (C - N))
+                where C is the bitsize of A.
+
+                It is theoretically possible that the target machine might
+                not be able to perform either shift and hence we would
+                be making two libcalls rather than just the one for the
+                shift (similarly if IOR could not be done).  We will allow
+                this extremely unlikely lossage to avoid complicating the
+                code below.  */
+
+             if (GET_CODE (op1) == CONST_INT && INTVAL (op1) > 0
+                 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
+               {
+                 rtx subtarget = target == shifted ? 0 : target;
+                 rtx temp1;
+                 tree other_amount
+                   = build_int_2 (GET_MODE_BITSIZE (mode) - INTVAL (op1), 0);
+
+                 shifted = force_reg (mode, shifted);
+
+                 temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
+                                      mode, shifted, amount, subtarget, 1);
+                 temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
+                                       mode, shifted, other_amount, 0, 1);
+                 return expand_binop (mode, ior_optab, temp, temp1, target,
+                                      unsignedp, methods);
+               }
+             else
+               methods = OPTAB_LIB;
+           }
  
           temp = expand_binop (mode,
                                left ? rotl_optab : rotr_optab,
                                shifted, op1, target, unsignedp, methods);
+
+         /* If we don't have the rotate, but we are rotating by a constant
+            that is in range, try a rotate in the opposite direction.  */
+
+         if (temp == 0 && GET_CODE (op1) == CONST_INT
+             && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
+           temp = expand_binop (mode,
+                                left ? rotr_optab : rotl_optab,
+                                shifted, 
+                                GEN_INT (GET_MODE_BITSIZE (mode)
+                                         - INTVAL (op1)),
+                                target, unsignedp, methods);
         }
        else if (unsignedp)
         {
@@ -1569,8 +1689,9 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
               || (methods == OPTAB_WIDEN
                   && GET_MODE_SIZE (mode) < GET_MODE_SIZE (output_mode)))
             {
-             /* Note convert_to_mode does protect_from_queue.  */
-             rtx shifted1 = convert_to_mode (output_mode, shifted, 1);
+             rtx shifted1 = convert_to_mode (output_mode,
+                                             protect_from_queue (shifted, 0),
+                                             1);
               enum machine_mode length_mode
                 = insn_operand_mode[(int) CODE_FOR_extzv][2];
               enum machine_mode pos_mode
@@ -1598,6 +1719,7 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
                         (target1, output_mode)))
                 target1 = gen_reg_rtx (output_mode);
  
+             xop1 = protect_from_queue (xop1, 0);
               xop1 = convert_to_mode (pos_mode, xop1,
                                       TREE_UNSIGNED (TREE_TYPE (amount)));
  
@@ -1610,18 +1732,17 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
               /* WIDTH gets the width of the bit field to extract:
                  wordsize minus # bits to shift by.  */
               if (GET_CODE (xop1) == CONST_INT)
-               width = gen_rtx (CONST_INT, VOIDmode,
-                                (GET_MODE_BITSIZE (mode) - INTVAL (op1)));
+               width = GEN_INT (GET_MODE_BITSIZE (mode) - INTVAL (op1));
               else
                 {
                   /* Now get the width in the proper mode.  */
+                 op1 = protect_from_queue (op1, 0);
                   width = convert_to_mode (length_mode, op1,
                                            TREE_UNSIGNED (TREE_TYPE (amount)));
  
                   width = expand_binop (length_mode, sub_optab,
-                                       gen_rtx (CONST_INT, VOIDmode,
-                                                GET_MODE_BITSIZE (mode)),
-                                       width, 0, 0, OPTAB_LIB_WIDEN);
+                                       GEN_INT (GET_MODE_BITSIZE (mode)),
+                                       width, NULL_RTX, 0, OPTAB_LIB_WIDEN);
                 }
  
               /* If this machine's extzv insists on a register for
@@ -1653,270 +1774,261 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
    return temp;
  }
  \f
-enum alg_code { alg_add, alg_subtract, alg_compound };
+enum alg_code { alg_zero, alg_m, alg_shift,
+                 alg_add_t_m2, alg_sub_t_m2,
+                 alg_add_factor, alg_sub_factor,
+                 alg_add_t2_m, alg_sub_t2_m,
+                 alg_add, alg_subtract, alg_factor, alg_shiftop };
  
  /* This structure records a sequence of operations.
     `ops' is the number of operations recorded.
     `cost' is their total cost.
     The operations are stored in `op' and the corresponding
-   integer coefficients in `coeff'.
-   These are the operations:
-   alg_add       Add to the total the multiplicand times the coefficient.
-   alg_subtract  Subtract the multiplicand times the coefficient.
-   alg_compound  This coefficient plus or minus the following one
-                 is multiplied into the total.  The following operation
-                 is alg_add or alg_subtract to indicate whether to add
-                or subtract the two coefficients.  */
+   logarithms of the integer coefficients in `log'.
  
-#ifndef MAX_BITS_PER_WORD
-#define MAX_BITS_PER_WORD BITS_PER_WORD
-#endif
+   These are the operations:
+   alg_zero            total := 0;
+   alg_m               total := multiplicand;
+   alg_shift           total := total * coeff
+   alg_add_t_m2                total := total + multiplicand * coeff;
+   alg_sub_t_m2                total := total - multiplicand * coeff;
+   alg_add_factor      total := total * coeff + total;
+   alg_sub_factor      total := total * coeff - total;
+   alg_add_t2_m                total := total * coeff + multiplicand;
+   alg_sub_t2_m                total := total * coeff - multiplicand;
+
+   The first operand must be either alg_zero or alg_m.  */
  
  struct algorithm
  {
-  int cost;
-  unsigned int ops;
+  short cost;
+  short ops;
+  /* The size of the OP and LOG fields are not directly related to the
+     word size, but the worst-case algorithms will be if we have few
+     consecutive ones or zeros, i.e., a multiplicand like 10101010101...
+     In that case we will generate shift-by-2, add, shift-by-2, add,...,
+     in total wordsize operations.  */
    enum alg_code op[MAX_BITS_PER_WORD];
-  unsigned int coeff[MAX_BITS_PER_WORD];
+  char log[MAX_BITS_PER_WORD];
  };
  
  /* Compute and return the best algorithm for multiplying by T.
-   Assume that add insns cost ADD_COST and shifts cost SHIFT_COST.
-   Return cost -1 if would cost more than MAX_COST.  */
+   The algorithm must cost less than cost_limit
+   If retval.cost >= COST_LIMIT, no algorithm was found and all
+   other field of the returned struct are undefined.  */
  
  static struct algorithm
-synth_mult (t, add_cost, shift_cost, max_cost)
-     unsigned int t;
-     int add_cost, shift_cost;
-     int max_cost;
+synth_mult (t, cost_limit)
+     unsigned HOST_WIDE_INT t;
+     int cost_limit;
  {
-  int m, n;
-  struct algorithm *best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
-  struct algorithm *alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
+  int m;
+  struct algorithm *best_alg
+    = (struct algorithm *)alloca (sizeof (struct algorithm));
+  struct algorithm *alg_in
+    = (struct algorithm *)alloca (sizeof (struct algorithm));
    unsigned int cost;
+  unsigned HOST_WIDE_INT q;
  
-  /* No matter what happens, we want to return a valid algorithm.  */
-  best_alg->cost = max_cost;
-  best_alg->ops = 0;
+  /* Indicate that no algorithm is yet found.  If no algorithm
+     is found, this value will be returned and indicate failure.  */
+  best_alg->cost = cost_limit;
  
-  /* Is t an exponent of 2, so we can just do a shift?  */
+  if (cost_limit <= 0)
+    return *best_alg;
  
-  if ((t & -t) == t)
+  /* t == 1 can be done in zero cost.  */
+  if (t == 1)
      {
-      if (t > 1)
-       {
-         if (max_cost >= shift_cost)
-           {
-             best_alg->cost = shift_cost;
-             best_alg->ops = 1;
-             best_alg->op[0] = alg_add;
-             best_alg->coeff[0] = t;
-           }
-         else
-           best_alg->cost = -1;
-       }
-      else if (t == 1)
-       {
-         if (max_cost >= 0)
-           best_alg->cost = 0;
-       }
-      else
-       best_alg->cost = 0;
-
+      best_alg->ops = 1;
+      best_alg->cost = 0;
+      best_alg->op[0] = alg_m;
        return *best_alg;
      }
  
-  /* If MAX_COST just permits as little as an addition (or less), we won't
-     succeed in synthesizing an algorithm for t.  Return immediately with
-     an indication of failure.  */
-  if (max_cost <= add_cost)
-    {
-      best_alg->cost = -1;
-      return *best_alg;
-    }
+  /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
+     fail now.  */
  
-  /* Look for factors of t of the form
-     t = q(2**m +- 1), 2 <= m <= floor(log2(t)) - 1.
-     If we find such a factor, we can multiply by t using an algorithm that
-     multiplies by q, shift the result by m and add/subtract it to itself.  */
-
-  for (m = floor_log2 (t) - 1; m >= 2; m--)
+  else if (t == 0)
      {
-      int m_exp_2 = 1 << m;
-      int d;
-
-      d = m_exp_2 + 1;
-      if (t % d == 0)
+      if (zero_cost >= cost_limit)
+       return *best_alg;
+      else
         {
-         int q = t / d;
-
-         cost = add_cost + shift_cost * 2;
-
-         *alg_in = synth_mult (q, add_cost, shift_cost,
-                               MIN (max_cost, best_alg->cost) - cost);
-
-         if (alg_in->cost >= 0)
-           {
-             cost += alg_in->cost;
-
-             if (cost < best_alg->cost)
-               {
-                 struct algorithm *x;
-                 x = alg_in;
-                 alg_in = best_alg;
-                 best_alg = x;
-                 best_alg->coeff[best_alg->ops] = m_exp_2;
-                 best_alg->op[best_alg->ops++] = alg_compound;
-                 best_alg->coeff[best_alg->ops] = 1;
-                 best_alg->op[best_alg->ops++] = alg_add;
-                 best_alg->cost = cost;
-               }
-           }
+         best_alg->ops = 1;
+         best_alg->cost = zero_cost;
+         best_alg->op[0] = alg_zero;
+         return *best_alg;
         }
+    }
  
-      d = m_exp_2 - 1;
-      if (t % d == 0)
-       {
-         int q = t / d;
-
-         cost = add_cost + shift_cost * 2;
+  /* If we have a group of zero bits at the low-order part of T, try
+     multiplying by the remaining bits and then doing a shift.  */
  
-         *alg_in = synth_mult (q, add_cost, shift_cost,
-                               MIN (max_cost, best_alg->cost) - cost);
+  if ((t & 1) == 0)
+    {
+      m = floor_log2 (t & -t); /* m = number of low zero bits */
+      q = t >> m;
+      cost = shift_cost[m];
+      if (cost < cost_limit)
+       {
+         *alg_in = synth_mult (q, cost_limit - cost);
  
-         if (alg_in->cost >= 0)
+         cost += alg_in->cost;
+         if (cost < best_alg->cost)
             {
-             cost += alg_in->cost;
-
-             if (cost < best_alg->cost)
-               {
-                 struct algorithm *x;
-                 x = alg_in;
-                 alg_in = best_alg;
-                 best_alg = x;
-                 best_alg->coeff[best_alg->ops] = m_exp_2;
-                 best_alg->op[best_alg->ops++] = alg_compound;
-                 best_alg->coeff[best_alg->ops] = 1;
-                 best_alg->op[best_alg->ops++] = alg_subtract;
-                 best_alg->cost = cost;
-               }
+             struct algorithm *x;
+             x = alg_in, alg_in = best_alg, best_alg = x;
+             best_alg->log[best_alg->ops] = m;
+             best_alg->op[best_alg->ops++] = alg_shift;
+             best_alg->cost = cost_limit = cost;
             }
         }
      }
  
-  /* Try load effective address instructions, i.e. do a*3, a*5, a*9.  */
-
-  {
-    int q;
-    int w;
-
-    q = t & -t;                        /* get out lsb */
-    w = (t - q) & -(t - q);    /* get out next lsb */
-
-    if (w / q <= lea_max_mul)
-      {
-       cost = lea_cost + (q != 1 ? shift_cost : 0);
-
-       *alg_in = synth_mult (t - q - w, add_cost, shift_cost,
-                             MIN (max_cost, best_alg->cost) - cost);
-
-       if (alg_in->cost >= 0)
-         {
-           cost += alg_in->cost;
-
-           /* Use <= to prefer this method to the factoring method
-              when the cost appears the same, because this method
-              uses fewer temporary registers.  */
-           if (cost <= best_alg->cost)
-             {
-               struct algorithm *x;
-               x = alg_in;
-               alg_in = best_alg;
-               best_alg = x;
-               best_alg->coeff[best_alg->ops] = w;
-               best_alg->op[best_alg->ops++] = alg_add;
-               best_alg->coeff[best_alg->ops] = q;
-               best_alg->op[best_alg->ops++] = alg_add;
-               best_alg->cost = cost;
-             }
-         }
-      }
-  }
-
-  /* Now, use the good old method to add or subtract at the leftmost
-     1-bit.  */
-
+  /* If we have an odd number, add or subtract one.  */
+  if ((t & 1) != 0)
    {
-    int q;
-    int w;
+    unsigned HOST_WIDE_INT w;
  
-    q = t & -t;                        /* get out lsb */
-    for (w = q; (w & t) != 0; w <<= 1)
+    for (w = 1; (w & t) != 0; w <<= 1)
        ;
-    if ((w > q << 1)
-       /* Reject the case where t has only two bits.
+    if (w > 2
+       /* Reject the case where t is 3.
            Thus we prefer addition in that case.  */
-       && !(t < w && w == q << 2))
+       && t != 3)
        {
-       /* There are many bits in a row.  Make 'em by subtraction.  */
+       /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
  
         cost = add_cost;
-       if (q != 1)
-         cost += shift_cost;
-
-       *alg_in = synth_mult (t + q, add_cost, shift_cost,
-                             MIN (max_cost, best_alg->cost) - cost);
+       *alg_in = synth_mult (t + 1, cost_limit - cost);
  
-       if (alg_in->cost >= 0)
+       cost += alg_in->cost;
+       if (cost < best_alg->cost)
           {
-           cost += alg_in->cost;
-
-           /* Use <= to prefer this method to the factoring method
-              when the cost appears the same, because this method
-              uses fewer temporary registers.  */
-           if (cost <= best_alg->cost)
-             {
-               struct algorithm *x;
-               x = alg_in;
-               alg_in = best_alg;
-               best_alg = x;
-               best_alg->coeff[best_alg->ops] = q;
-               best_alg->op[best_alg->ops++] = alg_subtract;
-               best_alg->cost = cost;
-             }
+           struct algorithm *x;
+           x = alg_in, alg_in = best_alg, best_alg = x;
+           best_alg->log[best_alg->ops] = 0;
+           best_alg->op[best_alg->ops++] = alg_sub_t_m2;
+           best_alg->cost = cost_limit = cost;
           }
        }
      else
        {
-       /* There's only one bit at the left.  Make it by addition.  */
+       /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
  
         cost = add_cost;
-       if (q != 1)
-         cost += shift_cost;
+       *alg_in = synth_mult (t - 1, cost_limit - cost);
  
-       *alg_in = synth_mult (t - q, add_cost, shift_cost,
-                             MIN (max_cost, best_alg->cost) - cost);
-
-       if (alg_in->cost >= 0)
+       cost += alg_in->cost;
+       if (cost < best_alg->cost)
           {
-           cost += alg_in->cost;
-
-           if (cost <= best_alg->cost)
-             {
-               struct algorithm *x;
-               x = alg_in;
-               alg_in = best_alg;
-               best_alg = x;
-               best_alg->coeff[best_alg->ops] = q;
-               best_alg->op[best_alg->ops++] = alg_add;
-               best_alg->cost = cost;
-             }
+           struct algorithm *x;
+           x = alg_in, alg_in = best_alg, best_alg = x;
+           best_alg->log[best_alg->ops] = 0;
+           best_alg->op[best_alg->ops++] = alg_add_t_m2;
+           best_alg->cost = cost_limit = cost;
           }
        }
    }
  
-  if (best_alg->cost >= max_cost)
-    best_alg->cost = -1;
+  /* Look for factors of t of the form
+     t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
+     If we find such a factor, we can multiply by t using an algorithm that
+     multiplies by q, shift the result by m and add/subtract it to itself.
+
+     We search for large factors first and loop down, even if large factors
+     are less probable than small; if we find a large factor we will find a
+     good sequence quickly, and therefore be able to prune (by decreasing
+     COST_LIMIT) the search.  */
+
+  for (m = floor_log2 (t - 1); m >= 2; m--)
+    {
+      unsigned HOST_WIDE_INT d;
+
+      d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
+      if (t % d == 0 && t > d)
+       {
+         cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
+         *alg_in = synth_mult (t / d, cost_limit - cost);
+
+         cost += alg_in->cost;
+         if (cost < best_alg->cost)
+           {
+             struct algorithm *x;
+             x = alg_in, alg_in = best_alg, best_alg = x;
+             best_alg->log[best_alg->ops] = m;
+             best_alg->op[best_alg->ops++] = alg_add_factor;
+             best_alg->cost = cost_limit = cost;
+           }
+       }
+
+      d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
+      if (t % d == 0 && t > d)
+       {
+         cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
+         *alg_in = synth_mult (t / d, cost_limit - cost);
+
+         cost += alg_in->cost;
+         if (cost < best_alg->cost)
+           {
+             struct algorithm *x;
+             x = alg_in, alg_in = best_alg, best_alg = x;
+             best_alg->log[best_alg->ops] = m;
+             best_alg->op[best_alg->ops++] = alg_sub_factor;
+             best_alg->cost = cost_limit = cost;
+           }
+       }
+    }
+
+  /* Try shift-and-add (load effective address) instructions,
+     i.e. do a*3, a*5, a*9.  */
+  if ((t & 1) != 0)
+    {
+      q = t - 1;
+      q = q & -q;
+      m = exact_log2 (q);
+      if (m >= 0)
+       {
+         cost = shiftadd_cost[m];
+         *alg_in = synth_mult ((t - 1) >> m, cost_limit - cost);
+
+         cost += alg_in->cost;
+         if (cost < best_alg->cost)
+           {
+             struct algorithm *x;
+             x = alg_in, alg_in = best_alg, best_alg = x;
+             best_alg->log[best_alg->ops] = m;
+             best_alg->op[best_alg->ops++] = alg_add_t2_m;
+             best_alg->cost = cost_limit = cost;
+           }
+       }
+
+      q = t + 1;
+      q = q & -q;
+      m = exact_log2 (q);
+      if (m >= 0)
+       {
+         cost = shiftsub_cost[m];
+         *alg_in = synth_mult ((t + 1) >> m, cost_limit - cost);
+
+         cost += alg_in->cost;
+         if (cost < best_alg->cost)
+           {
+             struct algorithm *x;
+             x = alg_in, alg_in = best_alg, best_alg = x;
+             best_alg->log[best_alg->ops] = m;
+             best_alg->op[best_alg->ops++] = alg_sub_t2_m;
+             best_alg->cost = cost_limit = cost;
+           }
+       }
+    }
+
+  /* If we are getting a too long sequence for `struct algorithm'
+     to record, store a fake cost to make this search fail.  */
+  if (best_alg->ops == MAX_BITS_PER_WORD)
+    best_alg->cost = cost_limit;
+
    return *best_alg;
  }
  \f
@@ -1944,16 +2056,22 @@ expand_mult (mode, op0, op1, target, unsignedp)
      {
        if ((CONST_DOUBLE_HIGH (op1) == 0 && CONST_DOUBLE_LOW (op1) >= 0)
           || (CONST_DOUBLE_HIGH (op1) == -1 && CONST_DOUBLE_LOW (op1) < 0))
-       const_op1 = gen_rtx (CONST_INT, VOIDmode, CONST_DOUBLE_LOW (op1));
+       const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
      }
  
-  if (GET_CODE (const_op1) == CONST_INT && ! mult_is_very_cheap && optimize)
+  /* We used to test optimize here, on the grounds that it's better to
+     produce a smaller program when -O is not used.
+     But this causes such a terrible slowdown sometimes
+     that it seems better to use synth_mult always.  */
+
+  if (GET_CODE (const_op1) == CONST_INT && ! mult_is_very_cheap)
      {
        struct algorithm alg;
        struct algorithm neg_alg;
        int negate = 0;
-      int absval = INTVAL (op1);
-      rtx last;
+      HOST_WIDE_INT val = INTVAL (op1);
+      HOST_WIDE_INT val_so_far;
+      rtx insn;
  
        /* Try to do the computation two ways: multiply by the negative of OP1
          and then negate, or do the multiplication directly.  The latter is
@@ -1962,20 +2080,19 @@ expand_mult (mode, op0, op1, target, unsignedp)
          has a factor of 2**m +/- 1, while the negated value does not or
          vice versa.  */
  
-      alg = synth_mult (absval, add_cost, shift_cost, mult_cost);
-      neg_alg = synth_mult (- absval, add_cost, shift_cost,
-                           mult_cost - negate_cost);
+      alg = synth_mult (val, mult_cost);
+      neg_alg = synth_mult (- val,
+                           (alg.cost < mult_cost ? alg.cost : mult_cost)
+                           - negate_cost);
  
-      if (neg_alg.cost >= 0 && neg_alg.cost + negate_cost < alg.cost)
-       alg = neg_alg, negate = 1, absval = - absval;
+      if (neg_alg.cost + negate_cost < alg.cost)
+       alg = neg_alg, negate = 1;
  
-      if (alg.cost >= 0)
+      if (alg.cost < mult_cost)
         {
-         /* If we found something, it must be cheaper than multiply.
-            So use it.  */
-         int opno = 0;
+         /* We found something cheaper than a multiply insn.  */
+         int opno;
           rtx accum, tem;
-         int factors_seen = 0;
  
           op0 = protect_from_queue (op0, 0);
  
@@ -1984,122 +2101,113 @@ expand_mult (mode, op0, op1, target, unsignedp)
           if (GET_CODE (op0) == MEM)
             op0 = force_reg (mode, op0);
  
-         if (alg.ops == 0)
-           accum = copy_to_mode_reg (mode, op0);
-         else
+         /* ACCUM starts out either as OP0 or as a zero, depending on
+            the first operation.  */
+
+         if (alg.op[0] == alg_zero)
             {
-             /* 1 if this is the last in a series of adds and subtracts.  */
-             int last = (1 == alg.ops || alg.op[1] == alg_compound);
-             int log = floor_log2 (alg.coeff[0]);
-             if (! factors_seen && ! last)
-               log -= floor_log2 (alg.coeff[1]);
-
-             if (alg.op[0] != alg_add)
-               abort ();
-             accum = expand_shift (LSHIFT_EXPR, mode, op0,
-                                   build_int_2 (log, 0),
-                                   0, 0);
+             accum = copy_to_mode_reg (mode, const0_rtx);
+             val_so_far = 0;
             }
-   
-         while (++opno < alg.ops)
+         else if (alg.op[0] == alg_m)
             {
-             int log = floor_log2 (alg.coeff[opno]);
-             /* 1 if this is the last in a series of adds and subtracts.  */
-             int last = (opno + 1 == alg.ops
-                         || alg.op[opno + 1] == alg_compound);
-
-             /* If we have not yet seen any separate factors (alg_compound)
-                then turn op0<<a1 + op0<<a2 + op0<<a3... into
-                (op0<<(a1-a2) + op0)<<(a2-a3) + op0...  */
+             accum  = copy_to_mode_reg (mode, op0);
+             val_so_far = 1;
+           }
+         else
+           abort ();
+
+         for (opno = 1; opno < alg.ops; opno++)
+           {
+             int log = alg.log[opno];
+             rtx shift_subtarget = preserve_subexpressions_p () ? 0 : accum;
+             rtx add_target = opno == alg.ops - 1 && target != 0 ? target : 0;
+
               switch (alg.op[opno])
                 {
-               case alg_add:
-                 if (factors_seen)
-                   {
-                     tem = expand_shift (LSHIFT_EXPR, mode, op0,
-                                         build_int_2 (log, 0), 0, 0);
-                     accum = force_operand (gen_rtx (PLUS, mode, accum, tem),
-                                            accum);
-                   }
-                 else
-                   {
-                     if (! last)
-                       log -= floor_log2 (alg.coeff[opno + 1]);
-                     accum = force_operand (gen_rtx (PLUS, mode, accum, op0),
-                                            accum);
-                     accum = expand_shift (LSHIFT_EXPR, mode, accum,
-                                           build_int_2 (log, 0), accum, 0);
-                   }
+               case alg_shift:
+                 accum = expand_shift (LSHIFT_EXPR, mode, accum,
+                                       build_int_2 (log, 0), NULL_RTX, 0);
+                 val_so_far <<= log;
                   break;
  
-               case alg_subtract:
-                 if (factors_seen)
-                   {
-                     tem = expand_shift (LSHIFT_EXPR, mode, op0,
-                                         build_int_2 (log, 0), 0, 0);
-                     accum = force_operand (gen_rtx (MINUS, mode, accum, tem),
-                                            accum);
-                   }
-                 else
-                   {
-                     if (! last)
-                       log -= floor_log2 (alg.coeff[opno + 1]);
-                     accum = force_operand (gen_rtx (MINUS, mode, accum, op0),
-                                            accum);
-                     accum = expand_shift (LSHIFT_EXPR, mode, accum,
-                                           build_int_2 (log, 0), accum, 0);
-                   }
+               case alg_add_t_m2:
+                 tem = expand_shift (LSHIFT_EXPR, mode, op0,
+                                     build_int_2 (log, 0), NULL_RTX, 0);
+                 accum = force_operand (gen_rtx (PLUS, mode, accum, tem),
+                                        add_target ? add_target : accum);
+                 val_so_far += (HOST_WIDE_INT) 1 << log;
+                 break;
  
+               case alg_sub_t_m2:
+                 tem = expand_shift (LSHIFT_EXPR, mode, op0,
+                                     build_int_2 (log, 0), NULL_RTX, 0);
+                 accum = force_operand (gen_rtx (MINUS, mode, accum, tem),
+                                        add_target ? add_target : accum);
+                 val_so_far -= (HOST_WIDE_INT) 1 << log;
                   break;
  
-               case alg_compound:
-                 factors_seen = 1;
-                 tem = expand_shift (LSHIFT_EXPR, mode, accum,
-                                     build_int_2 (log, 0), 0, 0);
+               case alg_add_t2_m:
+                 accum = expand_shift (LSHIFT_EXPR, mode, accum,
+                                       build_int_2 (log, 0), accum, 0);
+                 accum = force_operand (gen_rtx (PLUS, mode, accum, op0),
+                                        add_target ? add_target : accum);
+                 val_so_far = (val_so_far << log) + 1;
+                 break;
  
-                 log = floor_log2 (alg.coeff[opno + 1]);
+               case alg_sub_t2_m:
                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
-                                       build_int_2 (log, 0), 0, 0);
-                 opno++;
-                 if (alg.op[opno] == alg_add)
-                   accum = force_operand (gen_rtx (PLUS, mode, tem, accum),
-                                          tem);
-                 else
-                   accum = force_operand (gen_rtx (MINUS, mode, tem, accum),
-                                          tem);
-               }
-           }
+                                       build_int_2 (log, 0), accum, 0);
+                 accum = force_operand (gen_rtx (MINUS, mode, accum, op0),
+                                        add_target ? add_target : accum);
+                 val_so_far = (val_so_far << log) - 1;
+                 break;
  
-         /* Write a REG_EQUAL note on the last insn so that we can cse 
-            multiplication sequences.  We need not do this if we were
-            multiplying by a power of two, since only one insn would have
-            been generated.
+               case alg_add_factor:
+                 tem = expand_shift (LSHIFT_EXPR, mode, accum,
+                                     build_int_2 (log, 0), NULL_RTX, 0);
+                 accum = force_operand (gen_rtx (PLUS, mode, accum, tem),
+                                        add_target ? add_target : accum);
+                 val_so_far += val_so_far << log;
+                 break;
  
-            ??? We could also write REG_EQUAL notes on the last insn of
-            each sequence that uses a single temporary, but it is not
-            clear how to calculate the partial product so far.
+               case alg_sub_factor:
+                 tem = expand_shift (LSHIFT_EXPR, mode, accum,
+                                     build_int_2 (log, 0), NULL_RTX, 0);
+                 accum = force_operand (gen_rtx (MINUS, mode, tem, accum),
+                                        add_target ? add_target : tem);
+                 val_so_far = (val_so_far << log) - val_so_far;
+                 break;
  
-            Torbjorn: Can you do this?  */
+               default:
+                 abort ();;
+               }
  
-         if (exact_log2 (absval) < 0)
-           {
-             last = get_last_insn ();
-             REG_NOTES (last)
+             /* Write a REG_EQUAL note on the last insn so that we can cse
+                multiplication sequences.  */
+
+             insn = get_last_insn ();
+             REG_NOTES (insn)
                 = gen_rtx (EXPR_LIST, REG_EQUAL,
-                          gen_rtx (MULT, mode, op0, 
-                                   negate ? gen_rtx (CONST_INT,
-                                                     VOIDmode, absval)
-                                   : op1),
-                          REG_NOTES (last));
+                          gen_rtx (MULT, mode, op0, GEN_INT (val_so_far)),
+                          REG_NOTES (insn));
             }
  
-         return (negate ? expand_unop (mode, neg_optab, accum, target, 0)
-                 : accum);
+         if (negate)
+           {
+             val_so_far = - val_so_far;
+             accum = expand_unop (mode, neg_optab, accum, target, 0);
+           }
+
+         if (val != val_so_far)
+           abort ();
+
+         return accum;
         }
      }
  
    /* This used to use umul_optab if unsigned,
-     but I think that for non-widening multiply there is no difference
+     but for non-widening multiply there is no difference
       between signed and unsigned.  */
    op0 = expand_binop (mode, smul_optab,
                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
@@ -2136,11 +2244,18 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
    register rtx result = 0;
    enum machine_mode compute_mode;
    int log = -1;
+  int size;
    int can_clobber_op0;
    int mod_insn_no_good = 0;
    rtx adjusted_op0 = op0;
    optab optab1, optab2;
  
+  /* We shouldn't be called with op1 == const1_rtx, but some of the
+     code below will malfunction if we are, so check here and handle
+     the special case if so.  */
+  if (op1 == const1_rtx)
+    return rem_flag ? const0_rtx : op0;
+
    /* Don't use the function value register as a target
       since we have to read it as well as write it,
       and function-inlining gets confused by this.  */
@@ -2226,6 +2341,8 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
    if (compute_mode == VOIDmode)
      compute_mode = mode;
  
+  size = GET_MODE_BITSIZE (compute_mode);
+
    /* Now convert to the best mode to use.  Show we made a copy of OP0
       and hence we can clobber it (we cannot use a SUBREG to widen
       something.  */
@@ -2236,6 +2353,19 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
        op1 = convert_to_mode (compute_mode, op1, unsignedp);
      }
  
+  /* If we are computing the remainder and one of the operands is a volatile
+     MEM, copy it into a register.  */
+
+  if (rem_flag && GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
+    adjusted_op0 = op0 = force_reg (compute_mode, op0), can_clobber_op0 = 1;
+  if (rem_flag && GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
+    op1 = force_reg (compute_mode, op1);
+
+  /* If we are computing the remainder, op0 will be needed later to calculate
+     X - Y * (X / Y), therefore cannot be clobbered. */
+  if (rem_flag)
+    can_clobber_op0 = 0;
+
    if (target == 0 || GET_MODE (target) != compute_mode)
      target = gen_reg_rtx (compute_mode);
  
@@ -2245,19 +2375,47 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
      case TRUNC_DIV_EXPR:
        if (log >= 0 && ! unsignedp)
         {
-         rtx label = gen_label_rtx ();
-         if (! can_clobber_op0)
+         /* Here we need to add OP1-1 if OP0 is negative, 0 otherwise.
+            This can be computed without jumps by arithmetically shifting
+            OP0 right LOG-1 places and then shifting right logically
+            SIZE-LOG bits.  The resulting value is unconditionally added
+            to OP0.  */
+         if (log == 1 || BRANCH_COST >= 3)
             {
-             adjusted_op0 = copy_to_suggested_reg (adjusted_op0, target,
-                                                   compute_mode);
-             /* Copy op0 to a reg, since emit_cmp_insn will call emit_queue
-                which will screw up mem refs for autoincrements.  */
-             op0 = force_reg (compute_mode, op0);
+             rtx temp = gen_reg_rtx (compute_mode);
+             if (! can_clobber_op0)
+               /* Copy op0 to a reg, to play safe,
+                  since this is done in the other path.  */
+               op0 = force_reg (compute_mode, op0);
+             temp = copy_to_suggested_reg (adjusted_op0, temp, compute_mode);
+             temp = expand_shift (RSHIFT_EXPR, compute_mode, temp,
+                                  build_int_2 (log - 1, 0), NULL_RTX, 0);
+             temp = expand_shift (RSHIFT_EXPR, compute_mode, temp,
+                                  build_int_2 (size - log, 0),
+                                  temp, 1);
+             /* We supply 0 as the target to make a new pseudo
+                for the value; that helps loop.c optimize the result.  */
+             adjusted_op0 = expand_binop (compute_mode, add_optab,
+                                          adjusted_op0, temp,
+                                          0, 0, OPTAB_LIB_WIDEN);
+           }
+         else
+           {
+             rtx label = gen_label_rtx ();
+             if (! can_clobber_op0)
+               {
+                 adjusted_op0 = copy_to_suggested_reg (adjusted_op0, target,
+                                                       compute_mode);
+                 /* Copy op0 to a reg, since emit_cmp_insn will call emit_queue
+                    which will screw up mem refs for autoincrements.  */
+                 op0 = force_reg (compute_mode, op0);
+               }
+             emit_cmp_insn (adjusted_op0, const0_rtx, GE, 
+                            NULL_RTX, compute_mode, 0, 0);
+             emit_jump_insn (gen_bge (label));
+             expand_inc (adjusted_op0, plus_constant (op1, -1));
+             emit_label (label);
             }
-         emit_cmp_insn (adjusted_op0, const0_rtx, GE, 0, compute_mode, 0, 0);
-         emit_jump_insn (gen_bge (label));
-         expand_inc (adjusted_op0, plus_constant (op1, -1));
-         emit_label (label);
           mod_insn_no_good = 1;
         }
        break;
@@ -2275,7 +2433,8 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
                  which will screw up mem refs for autoincrements.  */
               op0 = force_reg (compute_mode, op0);
             }
-         emit_cmp_insn (adjusted_op0, const0_rtx, GE, 0, compute_mode, 0, 0);
+         emit_cmp_insn (adjusted_op0, const0_rtx, GE, 
+                        NULL_RTX, compute_mode, 0, 0);
           emit_jump_insn (gen_bge (label));
           expand_dec (adjusted_op0, op1);
           expand_inc (adjusted_op0, const1_rtx);
@@ -2300,7 +2459,8 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
           if (! unsignedp)
             {
               label = gen_label_rtx ();
-             emit_cmp_insn (adjusted_op0, const0_rtx, LE, 0, compute_mode, 0, 0);
+             emit_cmp_insn (adjusted_op0, const0_rtx, LE, 
+                            NULL_RTX, compute_mode, 0, 0);
               emit_jump_insn (gen_ble (label));
             }
           expand_inc (adjusted_op0, op1);
@@ -2312,7 +2472,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
         {
           adjusted_op0 = expand_binop (compute_mode, add_optab,
                                        adjusted_op0, plus_constant (op1, -1),
-                                      0, 0, OPTAB_LIB_WIDEN);
+                                      NULL_RTX, 0, OPTAB_LIB_WIDEN);
         }
        mod_insn_no_good = 1;
        break;
@@ -2330,20 +2490,37 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
        if (log < 0)
         {
           op1 = expand_shift (RSHIFT_EXPR, compute_mode, op1,
-                             integer_one_node, 0, 0);
+                             integer_one_node, NULL_RTX, 0);
           if (! unsignedp)
             {
-             rtx label = gen_label_rtx ();
-             emit_cmp_insn (adjusted_op0, const0_rtx, GE, 0, compute_mode, 0, 0);
-             emit_jump_insn (gen_bge (label));
-             expand_unop (compute_mode, neg_optab, op1, op1, 0);
-             emit_label (label);
+             if (BRANCH_COST >= 2)
+               {
+                 /* Negate OP1 if OP0 < 0.  Do this by computing a temporary
+                    that has all bits equal to the sign bit and exclusive
+                    or-ing it with OP1.  */
+                 rtx temp = gen_reg_rtx (compute_mode);
+                 temp = copy_to_suggested_reg (adjusted_op0, temp, compute_mode);
+                 temp = expand_shift (RSHIFT_EXPR, compute_mode, temp,
+                                      build_int_2 (size - 1, 0),
+                                      NULL_RTX, 0);
+                 op1 = expand_binop (compute_mode, xor_optab, op1, temp, op1,
+                                     unsignedp, OPTAB_LIB_WIDEN);
+               }
+             else
+               {
+                 rtx label = gen_label_rtx ();
+                 emit_cmp_insn (adjusted_op0, const0_rtx, GE, NULL_RTX,
+                                compute_mode, 0, 0);
+                 emit_jump_insn (gen_bge (label));
+                 expand_unop (compute_mode, neg_optab, op1, op1, 0);
+                 emit_label (label);
+               }
             }
           expand_inc (adjusted_op0, op1);
         }
        else
         {
-         op1 = gen_rtx (CONST_INT, VOIDmode, (1 << log) / 2);
+         op1 = GEN_INT (((HOST_WIDE_INT) 1 << log) / 2);
           expand_inc (adjusted_op0, op1);
         }
        mod_insn_no_good = 1;
@@ -2355,8 +2532,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
        /* Try to produce the remainder directly */
        if (log >= 0)
         result = expand_binop (compute_mode, and_optab, adjusted_op0,
-                              gen_rtx (CONST_INT, VOIDmode,
-                                       (1 << log) - 1),
+                              GEN_INT (((HOST_WIDE_INT) 1 << log) - 1),
                                target, 1, OPTAB_LIB_WIDEN);
        else
         {
@@ -2372,7 +2548,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
               if (! expand_twoval_binop (unsignedp
                                          ? udivmod_optab : sdivmod_optab,
                                          adjusted_op0, op1,
-                                        0, result, unsignedp))
+                                        NULL_RTX, result, unsignedp))
                 result = 0;
             }
         }
@@ -2390,12 +2566,14 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
         and a remainder subroutine would be ok,
         don't use a divide subroutine.  */
      result = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
-                               adjusted_op0, op1, 0, unsignedp, OPTAB_WIDEN);
+                               adjusted_op0, op1, NULL_RTX, unsignedp,
+                               OPTAB_WIDEN);
    else
      {
        /* Try a quotient insn, but not a library call.  */
        result = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
-                                 adjusted_op0, op1, rem_flag ? 0 : target,
+                                 adjusted_op0, op1,
+                                 rem_flag ? NULL_RTX : target,
                                   unsignedp, OPTAB_WIDEN);
        if (result == 0)
         {
@@ -2404,14 +2582,15 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
           result = gen_reg_rtx (mode);
           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
                                      adjusted_op0, op1,
-                                    result, 0, unsignedp))
+                                    result, NULL_RTX, unsignedp))
             result = 0;
         }
  
        /* If still no luck, use a library call.  */
        if (result == 0)
         result = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
-                                   adjusted_op0, op1, rem_flag ? 0 : target,
+                                   adjusted_op0, op1,
+                                   rem_flag ? NULL_RTX : target,
                                     unsignedp, OPTAB_LIB_WIDEN);
      }
  
@@ -2580,7 +2759,7 @@ expand_and (op0, op1, target)
    if (mode != VOIDmode)
      tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
    else if (GET_CODE (op0) == CONST_INT && GET_CODE (op1) == CONST_INT)
-    tem = gen_rtx (CONST_INT, VOIDmode, INTVAL (op0) & INTVAL (op1));
+    tem = GEN_INT (INTVAL (op0) & INTVAL (op1));
    else
      abort ();
  
@@ -2626,6 +2805,18 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
    if (mode == VOIDmode)
      mode = GET_MODE (op0);
  
+  /* If one operand is constant, make it the second one.  Only do this
+     if the other operand is not constant as well.  */
+
+  if ((CONSTANT_P (op0) && ! CONSTANT_P (op1))
+      || (GET_CODE (op0) == CONST_INT && GET_CODE (op1) != CONST_INT))
+    {
+      tem = op0;
+      op0 = op1;
+      op1 = tem;
+      code = swap_condition (code);
+    }
+
    /* For some comparisons with 1 and -1, we can convert this to 
       comparisons with zero.  This will often produce more opportunities for
       store-flag insns. */
@@ -2666,16 +2857,18 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
    if (op1 == const0_rtx && (code == LT || code == GE)
        && GET_MODE_CLASS (mode) == MODE_INT
        && (normalizep || STORE_FLAG_VALUE == 1
-         || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_INT
-             && STORE_FLAG_VALUE == 1 << (GET_MODE_BITSIZE (mode) - 1))))
+         || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
+             && (STORE_FLAG_VALUE 
+                 == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
      {
-      rtx subtarget = target;
+      subtarget = target;
  
        /* If the result is to be wider than OP0, it is best to convert it
          first.  If it is to be narrower, it is *incorrect* to convert it
          first.  */
        if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
         {
+         op0 = protect_from_queue (op0, 0);
           op0 = convert_to_mode (target_mode, op0, 0);
           mode = target_mode;
         }
@@ -2711,13 +2904,21 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
        emit_queue ();
        last = get_last_insn ();
  
-      comparison = compare_from_rtx (op0, op1, code, unsignedp, mode, 0, 0);
+      comparison
+       = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
        if (GET_CODE (comparison) == CONST_INT)
         return (comparison == const0_rtx ? const0_rtx
                 : normalizep == 1 ? const1_rtx
                 : normalizep == -1 ? constm1_rtx
                 : const_true_rtx);
  
+      /* If the code of COMPARISON doesn't match CODE, something is
+        wrong; we can no longer be sure that we have the operation.  
+        We could handle this case, but it should not happen.  */
+
+      if (GET_CODE (comparison) != code)
+       abort ();
+
        /* Get a reference to the target in the proper mode for this insn.  */
        compare_mode = insn_operand_mode[(int) icode][0];
        subtarget = target;
@@ -2742,15 +2943,22 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
             {
               convert_move (target, subtarget,
                             (GET_MODE_BITSIZE (compare_mode)
-                            <= HOST_BITS_PER_INT)
+                            <= HOST_BITS_PER_WIDE_INT)
                             && 0 == (STORE_FLAG_VALUE
-                                    & (1 << (GET_MODE_BITSIZE (compare_mode) -1))));
+                                    & ((HOST_WIDE_INT) 1
+                                       << (GET_MODE_BITSIZE (compare_mode) -1))));
               op0 = target;
               compare_mode = target_mode;
             }
           else
             op0 = subtarget;
  
+         /* If we want to keep subexpressions around, don't reuse our
+            last target.  */
+
+         if (preserve_subexpressions_p ())
+           subtarget = 0;
+
           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
              we don't have to do anything.  */
           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
@@ -2761,9 +2969,10 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
              makes it hard to use a value of just the sign bit due to
              ANSI integer constant typing rules.  */
-         else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_INT
+         else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
                    && (STORE_FLAG_VALUE
-                      & (1 << (GET_MODE_BITSIZE (compare_mode) - 1))))
+                      & ((HOST_WIDE_INT) 1
+                         << (GET_MODE_BITSIZE (compare_mode) - 1))))
             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
                                 subtarget, normalizep == 1);
@@ -2780,7 +2989,7 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
              conversion now.  */
           if (target_mode != compare_mode)
             {
-             convert_move (target, op0);
+             convert_move (target, op0, 0);
               return target;
             }
           else
@@ -2800,7 +3009,7 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
       comparison with zero.  Don't do any of these cases if branches are
       very cheap.  */
  
-  if (BRANCH_COST >= 0
+  if (BRANCH_COST > 0
        && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
        && op1 != const0_rtx)
      {
@@ -2837,8 +3046,9 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
        if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
         normalizep = STORE_FLAG_VALUE;
  
-      else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_INT
-              && STORE_FLAG_VALUE == 1 << (GET_MODE_BITSIZE (mode) - 1))
+      else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
+              && (STORE_FLAG_VALUE
+                  == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
         ;
        else
         return 0;
@@ -2900,6 +3110,7 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
        else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
         {
           mode = word_mode;
+         op0 = protect_from_queue (op0, 0);
           tem = convert_to_mode (mode, op0, 1);
         }