* double-int.h (double_int_setbit): Declare.

[pf3gnuchains/gcc-fork.git] / gcc / expmed.c
diff --git a/gcc/expmed.c b/gcc/expmed.c

index 841f94b..07b1dc6 100644 (file)
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -1,7 +1,7 @@
  /* Medium-level subroutines: convert bit-field store and extract
     and shifts, multiplies and divides to rtl instructions.
     Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
-   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
     Free Software Foundation, Inc.
  
  This file is part of GCC.
@@ -390,7 +390,7 @@ store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
          always get higher addresses.  */
        int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
        int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
-      
+
        byte_offset = 0;
  
        /* Paradoxical subregs need special handling on big endian machines.  */
@@ -685,6 +685,7 @@ store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
        rtx xop0 = op0;
        rtx last = get_last_insn ();
        rtx pat;
+      bool copy_back = false;
  
        /* Add OFFSET into OP0's address.  */
        if (MEM_P (xop0))
@@ -697,7 +698,24 @@ store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
            and we will need the original value of op0 if insv fails.  */
         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
        if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
-       xop0 = gen_rtx_SUBREG (op_mode, xop0, 0);
+       xop0 = gen_lowpart_SUBREG (op_mode, xop0);
+
+      /* If the destination is a paradoxical subreg such that we need a
+        truncate to the inner mode, perform the insertion on a temporary and
+        truncate the result to the original destination.  Note that we can't
+        just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
+        X) 0)) is (reg:N X).  */
+      if (GET_CODE (xop0) == SUBREG
+         && REG_P (SUBREG_REG (xop0))
+         && (!TRULY_NOOP_TRUNCATION
+             (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (xop0))),
+              GET_MODE_BITSIZE (op_mode))))
+       {
+         rtx tem = gen_reg_rtx (op_mode);
+         emit_move_insn (tem, xop0);
+         xop0 = tem;
+         copy_back = true;
+       }
  
        /* On big-endian machines, we count bits from the most significant.
          If the bit field insn does not, we must invert.  */
@@ -758,15 +776,8 @@ store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
         {
           emit_insn (pat);
  
-         /* If the mode of the insertion is wider than the mode of the
-            target register we created a paradoxical subreg for the
-            target.  Truncate the paradoxical subreg of the target to
-            itself properly.  */
-         if (!TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (op0)),
-                                     GET_MODE_BITSIZE (op_mode))
-             && (REG_P (xop0)
-                 || GET_CODE (xop0) == SUBREG))
-             convert_move (op0, xop0, true);
+         if (copy_back)
+           convert_move (op0, xop0, true);
           return true;
         }
        delete_insns_since (last);
@@ -1531,7 +1542,7 @@ extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
        /* If op0 is a register, we need it in EXT_MODE to make it
          acceptable to the format of ext(z)v.  */
        if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
-       xop0 = gen_rtx_SUBREG (ext_mode, xop0, 0);
+       xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
        if (MEM_P (xop0))
         /* Get ref to first byte containing part of the field.  */
         xop0 = adjust_address (xop0, byte_mode, xoffset);
@@ -1828,39 +1839,15 @@ extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
  static rtx
  mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
  {
-  HOST_WIDE_INT masklow, maskhigh;
-
-  if (bitsize == 0)
-    masklow = 0;
-  else if (bitpos < HOST_BITS_PER_WIDE_INT)
-    masklow = (HOST_WIDE_INT) -1 << bitpos;
-  else
-    masklow = 0;
-
-  if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
-    masklow &= ((unsigned HOST_WIDE_INT) -1
-               >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
+  double_int mask;
  
-  if (bitpos <= HOST_BITS_PER_WIDE_INT)
-    maskhigh = -1;
-  else
-    maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
-
-  if (bitsize == 0)
-    maskhigh = 0;
-  else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
-    maskhigh &= ((unsigned HOST_WIDE_INT) -1
-                >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
-  else
-    maskhigh = 0;
+  mask = double_int_mask (bitsize);
+  mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
  
    if (complement)
-    {
-      maskhigh = ~maskhigh;
-      masklow = ~masklow;
-    }
+    mask = double_int_not (mask);
  
-  return immed_double_const (masklow, maskhigh, mode);
+  return immed_double_int_const (mask, mode);
  }
  
  /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
@@ -1869,24 +1856,12 @@ mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
  static rtx
  lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
  {
-  unsigned HOST_WIDE_INT v = INTVAL (value);
-  HOST_WIDE_INT low, high;
-
-  if (bitsize < HOST_BITS_PER_WIDE_INT)
-    v &= ~((HOST_WIDE_INT) -1 << bitsize);
-
-  if (bitpos < HOST_BITS_PER_WIDE_INT)
-    {
-      low = v << bitpos;
-      high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
-    }
-  else
-    {
-      low = 0;
-      high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
-    }
+  double_int val;
+  
+  val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
+  val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
  
-  return immed_double_const (low, high, mode);
+  return immed_double_int_const (val, mode);
  }
  \f
  /* Extract a bit field that is split across two words
@@ -2354,7 +2329,7 @@ struct alg_hash_entry {
       Otherwise, the cost within which multiplication by T is
       impossible.  */
    struct mult_cost cost;
- 
+
    /* OPtimized for speed? */
    bool speed;
  };
@@ -3187,7 +3162,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
                                    target, unsignedp);
             }
         }
-        
+
        /* We used to test optimize here, on the grounds that it's better to
          produce a smaller program when -O is not used.  But this causes
          such a terrible slowdown sometimes that it seems better to always
@@ -3242,6 +3217,55 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
    gcc_assert (op0);
    return op0;
  }
+
+/* Perform a widening multiplication and return an rtx for the result.
+   MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
+   TARGET is a suggestion for where to store the result (an rtx).
+   THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
+   or smul_widen_optab.
+
+   We check specially for a constant integer as OP1, comparing the
+   cost of a widening multiply against the cost of a sequence of shifts
+   and adds.  */
+
+rtx
+expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
+                     int unsignedp, optab this_optab)
+{
+  bool speed = optimize_insn_for_speed_p ();
+
+  if (CONST_INT_P (op1)
+      && (INTVAL (op1) >= 0
+         || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT))
+    {
+      HOST_WIDE_INT coeff = INTVAL (op1);
+      int max_cost;
+      enum mult_variant variant;
+      struct algorithm algorithm;
+
+      /* Special case powers of two.  */
+      if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
+       {
+         op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
+         return expand_shift (LSHIFT_EXPR, mode, op0,
+                              build_int_cst (NULL_TREE, floor_log2 (coeff)),
+                              target, unsignedp);
+       }
+
+      /* Exclude cost of op0 from max_cost to match the cost
+        calculation of the synth_mult.  */
+      max_cost = mul_widen_cost[speed][mode];
+      if (choose_mult_variant (mode, coeff, &algorithm, &variant,
+                              max_cost))
+       {
+         op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
+         return expand_mult_const (mode, op0, coeff, target,
+                                   &algorithm, variant);
+       }
+    }
+  return expand_binop (mode, this_optab, op0, op1, target,
+                      unsignedp, OPTAB_LIB_WIDEN);
+}
  \f
  /* Return the smallest n such that 2**n >= X.  */
  
@@ -3566,8 +3590,8 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
  
    cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
  
-  /* We can't optimize modes wider than BITS_PER_WORD. 
-     ??? We might be able to perform double-word arithmetic if 
+  /* We can't optimize modes wider than BITS_PER_WORD.
+     ??? We might be able to perform double-word arithmetic if
       mode == word_mode, however all the cost calculations in
       synth_mult etc. assume single-word operations.  */
    if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
@@ -4183,7 +4207,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
                 else if (d == -1)
                   quotient = expand_unop (compute_mode, neg_optab, op0,
                                           tquotient, 0);
-               else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
+               else if (HOST_BITS_PER_WIDE_INT >= size
+                        && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
                   {
                     /* This case is not handled correctly below.  */
                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
@@ -4933,7 +4958,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
           if (!remainder)
             {
               remainder = gen_reg_rtx (compute_mode);
-             if (!expand_twoval_binop_libfunc 
+             if (!expand_twoval_binop_libfunc
                   (unsignedp ? udivmod_optab : sdivmod_optab,
                    op0, op1,
                    NULL_RTX, remainder,
@@ -4976,12 +5001,12 @@ make_tree (tree type, rtx x)
                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
                      < HOST_BITS_PER_WIDE_INT)))
           hi = -1;
-      
+
         t = build_int_cst_wide (type, INTVAL (x), hi);
-       
+
         return t;
        }
-      
+
      case CONST_DOUBLE:
        if (GET_MODE (x) == VOIDmode)
         t = build_int_cst_wide (type,
@@ -5078,10 +5103,11 @@ make_tree (tree type, rtx x)
      default:
        t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
  
-      /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
-        ptr_mode.  So convert.  */
+      /* If TYPE is a POINTER_TYPE, we might need to convert X from
+        address mode to pointer mode.  */
        if (POINTER_TYPE_P (type))
-       x = convert_memory_address (TYPE_MODE (type), x);
+       x = convert_memory_address_addr_space
+             (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
  
        /* Note that we do *not* use SET_DECL_RTL here, because we do not
          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
@@ -5115,12 +5141,12 @@ expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
  
  /* Helper function for emit_store_flag.  */
  static rtx
-emit_store_flag_1 (rtx target, enum insn_code icode, enum rtx_code code,
-                  enum machine_mode mode, enum machine_mode compare_mode,
-                  int unsignedp, rtx x, rtx y, int normalizep)
+emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
+            enum machine_mode mode, enum machine_mode compare_mode,
+            int unsignedp, rtx x, rtx y, int normalizep,
+            enum machine_mode target_mode)
  {
    rtx op0, last, comparison, subtarget, pattern;
-  enum machine_mode target_mode;
    enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
  
    last = get_last_insn ();
@@ -5138,8 +5164,12 @@ emit_store_flag_1 (rtx target, enum insn_code icode, enum rtx_code code,
        return NULL_RTX;
      }
  
-  if (!target
-      || optimize
+  if (target_mode == VOIDmode)
+    target_mode = result_mode;
+  if (!target)
+    target = gen_reg_rtx (target_mode);
+
+  if (optimize
        || !(insn_data[(int) icode].operand[0].predicate (target, result_mode)))
      subtarget = gen_reg_rtx (result_mode);
    else
@@ -5150,10 +5180,6 @@ emit_store_flag_1 (rtx target, enum insn_code icode, enum rtx_code code,
      return NULL_RTX;
    emit_insn (pattern);
  
-  if (!target)
-    target = gen_reg_rtx (GET_MODE (subtarget));
-  target_mode = GET_MODE (target);
-  
    /* If we are converting to a wider mode, first convert to
       TARGET_MODE, then normalize.  This produces better combining
       opportunities on machines that have a SIGN_EXTRACT when we are
@@ -5217,34 +5243,22 @@ emit_store_flag_1 (rtx target, enum insn_code icode, enum rtx_code code,
      return op0;
  }
  
-/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
-   and storing in TARGET.  Normally return TARGET.
-   Return 0 if that cannot be done.
-
-   MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
-   it is VOIDmode, they cannot both be CONST_INT.
  
-   UNSIGNEDP is for the case where we have to widen the operands
-   to perform the operation.  It says to use zero-extension.
+/* A subroutine of emit_store_flag only including "tricks" that do not
+   need a recursive call.  These are kept separate to avoid infinite
+   loops.  */
  
-   NORMALIZEP is 1 if we should convert the result to be either zero
-   or one.  Normalize is -1 if we should convert the result to be
-   either zero or -1.  If NORMALIZEP is zero, the result will be left
-   "raw" out of the scc insn.  */
-
-rtx
-emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
-                enum machine_mode mode, int unsignedp, int normalizep)
+static rtx
+emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
+                  enum machine_mode mode, int unsignedp, int normalizep,
+                  enum machine_mode target_mode)
  {
    rtx subtarget;
    enum insn_code icode;
    enum machine_mode compare_mode;
-  enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
    enum mode_class mclass;
-  enum rtx_code rcode;
    enum rtx_code scode;
-  rtx tem, trueval;
-  rtx last;
+  rtx tem;
  
    if (unsignedp)
      code = unsigned_condition (code);
@@ -5307,20 +5321,20 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
        if ((code == EQ || code == NE)
           && (op1 == const0_rtx || op1 == constm1_rtx))
         {
-         rtx op00, op01, op0both;
+         rtx op00, op01;
  
           /* Do a logical OR or AND of the two words and compare the
              result.  */
           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
-         op0both = expand_binop (word_mode,
-                                 op1 == const0_rtx ? ior_optab : and_optab,
-                                 op00, op01, NULL_RTX, unsignedp,
-                                 OPTAB_DIRECT);
-
-         if (op0both != 0)
-           return emit_store_flag (target, code, op0both, op1, word_mode,
-                                   unsignedp, normalizep);
+         tem = expand_binop (word_mode,
+                             op1 == const0_rtx ? ior_optab : and_optab,
+                             op00, op01, NULL_RTX, unsignedp,
+                             OPTAB_DIRECT);
+
+         if (tem != 0)
+           tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
+                                  unsignedp, normalizep);
         }
        else if ((code == LT || code == GE) && op1 == const0_rtx)
         {
@@ -5330,8 +5344,24 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
           op0h = simplify_gen_subreg (word_mode, op0, mode,
                                       subreg_highpart_offset (word_mode,
                                                               mode));
-         return emit_store_flag (target, code, op0h, op1, word_mode,
-                                 unsignedp, normalizep);
+         tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
+                                unsignedp, normalizep);
+       }
+      else
+       tem = NULL_RTX;
+
+      if (tem)
+       {
+         if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
+           return tem;
+         if (!target)
+           target = gen_reg_rtx (target_mode);
+
+         convert_move (target, tem,
+                       0 == ((normalizep ? normalizep : STORE_FLAG_VALUE)
+                             & ((HOST_WIDE_INT) 1
+                                << (GET_MODE_BITSIZE (word_mode) -1))));
+         return target;
         }
      }
  
@@ -5390,15 +5420,15 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
       if (icode != CODE_FOR_nothing)
         {
           do_pending_stack_adjust ();
-         tem = emit_store_flag_1 (target, icode, code, mode, compare_mode,
-                                  unsignedp, op0, op1, normalizep);
+         tem = emit_cstore (target, icode, code, mode, compare_mode,
+                            unsignedp, op0, op1, normalizep, target_mode);
           if (tem)
             return tem;
  
           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
             {
-             tem = emit_store_flag_1 (target, icode, scode, mode, compare_mode,
-                                      unsignedp, op1, op0, normalizep);
+             tem = emit_cstore (target, icode, scode, mode, compare_mode,
+                                unsignedp, op1, op0, normalizep, target_mode);
               if (tem)
                 return tem;
             }
@@ -5406,7 +5436,37 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
         }
      }
  
-  last = get_last_insn ();
+  return 0;
+}
+
+/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
+   and storing in TARGET.  Normally return TARGET.
+   Return 0 if that cannot be done.
+
+   MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
+   it is VOIDmode, they cannot both be CONST_INT.
+
+   UNSIGNEDP is for the case where we have to widen the operands
+   to perform the operation.  It says to use zero-extension.
+
+   NORMALIZEP is 1 if we should convert the result to be either zero
+   or one.  Normalize is -1 if we should convert the result to be
+   either zero or -1.  If NORMALIZEP is zero, the result will be left
+   "raw" out of the scc insn.  */
+
+rtx
+emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
+                enum machine_mode mode, int unsignedp, int normalizep)
+{
+  enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
+  enum rtx_code rcode;
+  rtx subtarget;
+  rtx tem, last, trueval;
+
+  tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
+                          target_mode);
+  if (tem)
+    return tem;
  
    /* If we reached here, we can't do this with a scc insn, however there
       are some comparisons that can be done in other ways.  Don't do any
@@ -5430,6 +5490,8 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
         return 0;
      }
  
+  last = get_last_insn ();
+
    /* If optimizing, use different pseudo registers for each insn, instead
       of reusing the same pseudo.  This leads to better CSE, but slows
       down the compiler, since there are more pseudos */
@@ -5450,21 +5512,27 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
         {
+          int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
+                         || (STORE_FLAG_VALUE == -1 && normalizep == 1));
+
           /* For the reverse comparison, use either an addition or a XOR.  */
-         if ((STORE_FLAG_VALUE == 1 && normalizep == -1)
-             || (STORE_FLAG_VALUE == -1 && normalizep == 1))
+          if (want_add
+             && rtx_cost (GEN_INT (normalizep), PLUS,
+                          optimize_insn_for_speed_p ()) == 0)
             {
-             tem = emit_store_flag (subtarget, rcode, op0, op1, mode, 0,
-                                    STORE_FLAG_VALUE);
+             tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
+                                      STORE_FLAG_VALUE, target_mode);
               if (tem)
                  return expand_binop (target_mode, add_optab, tem,
                                      GEN_INT (normalizep),
                                      target, 0, OPTAB_WIDEN);
             }
-         else
+          else if (!want_add
+                  && rtx_cost (trueval, XOR,
+                               optimize_insn_for_speed_p ()) == 0)
             {
-             tem = emit_store_flag (subtarget, rcode, op0, op1, mode, 0,
-                                    normalizep);
+             tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
+                                      normalizep, target_mode);
               if (tem)
                  return expand_binop (target_mode, xor_optab, tem, trueval,
                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
@@ -5476,7 +5544,7 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
        /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
        if (code == ORDERED || code == UNORDERED)
         return 0;
-       
+
        and_them = split_comparison (code, mode, &first_code, &code);
  
        /* If there are no NaNs, the first comparison should always fall through.
@@ -5484,13 +5552,15 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
        if (!HONOR_NANS (mode))
         {
            gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
-         return emit_store_flag (target, code, op0, op1, mode, 0, normalizep);
+         return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
+                                   target_mode);
         }
  
  #ifdef HAVE_conditional_move
        /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
          conditional move.  */
-      tem = emit_store_flag (subtarget, first_code, op0, op1, mode, 0, normalizep);
+      tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
+                              normalizep, target_mode);
        if (tem == 0)
         return 0;
  
@@ -5546,20 +5616,26 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
             && op1 == const0_rtx))
      {
+      int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
+                     || (STORE_FLAG_VALUE == -1 && normalizep == 1));
+
        /* Again, for the reverse comparison, use either an addition or a XOR.  */
-      if ((STORE_FLAG_VALUE == 1 && normalizep == -1)
-         || (STORE_FLAG_VALUE == -1 && normalizep == 1))
+      if (want_add
+         && rtx_cost (GEN_INT (normalizep), PLUS,
+                      optimize_insn_for_speed_p ()) == 0)
         {
-         tem = emit_store_flag (subtarget, rcode, op0, op1, mode, 0,
-                                STORE_FLAG_VALUE);
+         tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
+                                  STORE_FLAG_VALUE, target_mode);
           if (tem != 0)
              tem = expand_binop (target_mode, add_optab, tem,
                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
         }
-      else
+      else if (!want_add
+              && rtx_cost (trueval, XOR,
+                           optimize_insn_for_speed_p ()) == 0)
         {
-         tem = emit_store_flag (subtarget, rcode, op0, op1, mode, 0,
-                                normalizep);
+         tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
+                                  normalizep, target_mode);
           if (tem != 0)
              tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
@@ -5721,7 +5797,7 @@ emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
    /* If this failed, we have to do this with set/compare/jump/set code.
       For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
    trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
-  if (code == NE 
+  if (code == NE
        && GET_MODE_CLASS (mode) == MODE_INT
        && REG_P (target)
        && op0 == target
@@ -5729,7 +5805,7 @@ emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
      {
        label = gen_label_rtx ();
        do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
-                              mode, NULL_RTX, NULL_RTX, label);
+                              mode, NULL_RTX, NULL_RTX, label, -1);
        emit_move_insn (target, trueval);
        emit_label (label);
        return target;
@@ -5767,7 +5843,7 @@ emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
    emit_move_insn (target, trueval);
    label = gen_label_rtx ();
    do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
-                          NULL_RTX, label);
+                          NULL_RTX, label, -1);
  
    emit_move_insn (target, falseval);
    emit_label (label);
@@ -5785,5 +5861,5 @@ do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
  {
    int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
    do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
-                          NULL_RTX, NULL_RTX, label);
+                          NULL_RTX, NULL_RTX, label, -1);
  }