Merge dataflow branch into mainline

[pf3gnuchains/gcc-fork.git] / gcc / expmed.c
diff --git a/gcc/expmed.c b/gcc/expmed.c

index 5da9084..ad00973 100644 (file)
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -1,7 +1,8 @@
  /* Medium-level subroutines: convert bit-field store and extract
     and shifts, multiplies and divides to rtl instructions.
     Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
-   1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
+   Free Software Foundation, Inc.
  
  This file is part of GCC.
  
@@ -36,6 +37,8 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  #include "real.h"
  #include "recog.h"
  #include "langhooks.h"
+#include "df.h"
+#include "target.h"
  
  static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
                                    unsigned HOST_WIDE_INT,
@@ -103,7 +106,8 @@ static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
  static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
  static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
  static int mul_cost[NUM_MACHINE_MODES];
-static int div_cost[NUM_MACHINE_MODES];
+static int sdiv_cost[NUM_MACHINE_MODES];
+static int udiv_cost[NUM_MACHINE_MODES];
  static int mul_widen_cost[NUM_MACHINE_MODES];
  static int mul_highpart_cost[NUM_MACHINE_MODES];
  
@@ -115,11 +119,12 @@ init_expmed (void)
      struct rtx_def reg;                rtunion reg_fld[2];
      struct rtx_def plus;       rtunion plus_fld1;
      struct rtx_def neg;
-    struct rtx_def udiv;       rtunion udiv_fld1;
      struct rtx_def mult;       rtunion mult_fld1;
-    struct rtx_def div;                rtunion div_fld1;
-    struct rtx_def mod;                rtunion mod_fld1;
+    struct rtx_def sdiv;       rtunion sdiv_fld1;
+    struct rtx_def udiv;       rtunion udiv_fld1;
      struct rtx_def zext;
+    struct rtx_def sdiv_32;    rtunion sdiv_32_fld1;
+    struct rtx_def smod_32;    rtunion smod_32_fld1;
      struct rtx_def wide_mult;  rtunion wide_mult_fld1;
      struct rtx_def wide_lshr;  rtunion wide_lshr_fld1;
      struct rtx_def wide_trunc;
@@ -146,7 +151,7 @@ init_expmed (void)
  
    PUT_CODE (&all.reg, REG);
    /* Avoid using hard regs in ways which may be unsupported.  */
-  REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
+  SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
  
    PUT_CODE (&all.plus, PLUS);
    XEXP (&all.plus, 0) = &all.reg;
@@ -155,21 +160,25 @@ init_expmed (void)
    PUT_CODE (&all.neg, NEG);
    XEXP (&all.neg, 0) = &all.reg;
  
-  PUT_CODE (&all.udiv, UDIV);
-  XEXP (&all.udiv, 0) = &all.reg;
-  XEXP (&all.udiv, 1) = &all.reg;
-
    PUT_CODE (&all.mult, MULT);
    XEXP (&all.mult, 0) = &all.reg;
    XEXP (&all.mult, 1) = &all.reg;
  
-  PUT_CODE (&all.div, DIV);
-  XEXP (&all.div, 0) = &all.reg;
-  XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
+  PUT_CODE (&all.sdiv, DIV);
+  XEXP (&all.sdiv, 0) = &all.reg;
+  XEXP (&all.sdiv, 1) = &all.reg;
+
+  PUT_CODE (&all.udiv, UDIV);
+  XEXP (&all.udiv, 0) = &all.reg;
+  XEXP (&all.udiv, 1) = &all.reg;
+
+  PUT_CODE (&all.sdiv_32, DIV);
+  XEXP (&all.sdiv_32, 0) = &all.reg;
+  XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
  
-  PUT_CODE (&all.mod, MOD);
-  XEXP (&all.mod, 0) = &all.reg;
-  XEXP (&all.mod, 1) = XEXP (&all.div, 1);
+  PUT_CODE (&all.smod_32, MOD);
+  XEXP (&all.smod_32, 0) = &all.reg;
+  XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
  
    PUT_CODE (&all.zext, ZERO_EXTEND);
    XEXP (&all.zext, 0) = &all.reg;
@@ -205,10 +214,11 @@ init_expmed (void)
        PUT_MODE (&all.reg, mode);
        PUT_MODE (&all.plus, mode);
        PUT_MODE (&all.neg, mode);
-      PUT_MODE (&all.udiv, mode);
        PUT_MODE (&all.mult, mode);
-      PUT_MODE (&all.div, mode);
-      PUT_MODE (&all.mod, mode);
+      PUT_MODE (&all.sdiv, mode);
+      PUT_MODE (&all.udiv, mode);
+      PUT_MODE (&all.sdiv_32, mode);
+      PUT_MODE (&all.smod_32, mode);
        PUT_MODE (&all.wide_trunc, mode);
        PUT_MODE (&all.shift, mode);
        PUT_MODE (&all.shift_mult, mode);
@@ -217,11 +227,14 @@ init_expmed (void)
  
        add_cost[mode] = rtx_cost (&all.plus, SET);
        neg_cost[mode] = rtx_cost (&all.neg, SET);
-      div_cost[mode] = rtx_cost (&all.udiv, SET);
        mul_cost[mode] = rtx_cost (&all.mult, SET);
+      sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
+      udiv_cost[mode] = rtx_cost (&all.udiv, SET);
  
-      sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
-      smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
+      sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
+                              <= 2 * add_cost[mode]);
+      smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
+                              <= 4 * add_cost[mode]);
  
        wider_mode = GET_MODE_WIDER_MODE (mode);
        if (wider_mode != VOIDmode)
@@ -352,7 +365,25 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
          meaningful at a much higher level; when structures are copied
          between memory and regs, the higher-numbered regs
          always get higher addresses.  */
-      bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
+      int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
+      int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
+      
+      byte_offset = 0;
+
+      /* Paradoxical subregs need special handling on big endian machines.  */
+      if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
+       {
+         int difference = inner_mode_size - outer_mode_size;
+
+         if (WORDS_BIG_ENDIAN)
+           byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
+         if (BYTES_BIG_ENDIAN)
+           byte_offset += difference % UNITS_PER_WORD;
+       }
+      else
+       byte_offset = SUBREG_BYTE (op0);
+
+      bitnum += byte_offset * BITS_PER_UNIT;
        op0 = SUBREG_REG (op0);
      }
  
@@ -430,14 +461,11 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
              || (offset * BITS_PER_UNIT % bitsize == 0
                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
      {
-      if (GET_MODE (op0) != fieldmode)
-       {
-         if (MEM_P (op0))
-           op0 = adjust_address (op0, fieldmode, offset);
-         else
-           op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
-                                      byte_offset);
-       }
+      if (MEM_P (op0))
+       op0 = adjust_address (op0, fieldmode, offset);
+      else if (GET_MODE (op0) != fieldmode)
+       op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
+                                  byte_offset);
        emit_move_insn (op0, value);
        return value;
      }
@@ -607,11 +635,12 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
  
    if (HAVE_insv
        && GET_MODE (value) != BLKmode
-      && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
-      /* Ensure insv's size is wide enough for this field.  */
-      && (GET_MODE_BITSIZE (op_mode) >= bitsize)
+      && bitsize > 0
+      && GET_MODE_BITSIZE (op_mode) >= bitsize
        && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
-           && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
+           && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
+      && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
+                                                       VOIDmode))
      {
        int xbitpos = bitpos;
        rtx value1;
@@ -646,6 +675,7 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
             bestmode = GET_MODE (op0);
  
           if (bestmode == VOIDmode
+             || GET_MODE_SIZE (bestmode) < GET_MODE_SIZE (fieldmode)
               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
             goto insv_loses;
@@ -765,7 +795,7 @@ store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
  {
    enum machine_mode mode;
    unsigned int total_bits = BITS_PER_WORD;
-  rtx subtarget, temp;
+  rtx temp;
    int all_zero = 0;
    int all_one = 0;
  
@@ -891,24 +921,28 @@ store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
  
    /* Now clear the chosen bits in OP0,
       except that if VALUE is -1 we need not bother.  */
+  /* We keep the intermediates in registers to allow CSE to combine
+     consecutive bitfield assignments.  */
  
-  subtarget = op0;
+  temp = force_reg (mode, op0);
  
    if (! all_one)
      {
-      temp = expand_binop (mode, and_optab, op0,
+      temp = expand_binop (mode, and_optab, temp,
                            mask_rtx (mode, bitpos, bitsize, 1),
-                          subtarget, 1, OPTAB_LIB_WIDEN);
-      subtarget = temp;
+                          NULL_RTX, 1, OPTAB_LIB_WIDEN);
+      temp = force_reg (mode, temp);
      }
-  else
-    temp = op0;
  
    /* Now logical-or VALUE into OP0, unless it is zero.  */
  
    if (! all_zero)
-    temp = expand_binop (mode, ior_optab, temp, value,
-                        subtarget, 1, OPTAB_LIB_WIDEN);
+    {
+      temp = expand_binop (mode, ior_optab, temp, value,
+                          NULL_RTX, 1, OPTAB_LIB_WIDEN);
+      temp = force_reg (mode, temp);
+    }
+
    if (op0 != temp)
      emit_move_insn (op0, temp);
  }
@@ -1094,6 +1128,28 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
        return op0;
      }
  
+  /* See if we can get a better vector mode before extracting.  */
+  if (VECTOR_MODE_P (GET_MODE (op0))
+      && !MEM_P (op0)
+      && GET_MODE_INNER (GET_MODE (op0)) != tmode)
+    {
+      enum machine_mode new_mode;
+      int nunits = GET_MODE_NUNITS (GET_MODE (op0));
+
+      if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
+       new_mode = MIN_MODE_VECTOR_FLOAT;
+      else
+       new_mode = MIN_MODE_VECTOR_INT;
+
+      for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
+       if (GET_MODE_NUNITS (new_mode) == nunits
+           && GET_MODE_INNER (new_mode) == tmode
+           && targetm.vector_mode_supported_p (new_mode))
+         break;
+      if (new_mode != VOIDmode)
+       op0 = gen_lowpart (new_mode, op0);
+    }
+
    /* Use vec_extract patterns for extracting parts of vectors whenever
       available.  */
    if (VECTOR_MODE_P (GET_MODE (op0))
@@ -1144,6 +1200,8 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
         {
           emit_insn (seq);
           emit_insn (pat);
+         if (mode0 != mode)
+           return gen_lowpart (tmode, dest);
           return dest;
         }
      }
@@ -1356,7 +1414,8 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
    if (unsignedp)
      {
        if (HAVE_extzv
-         && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
+         && bitsize > 0
+         && GET_MODE_BITSIZE (extzv_mode) >= bitsize
           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
         {
@@ -1409,6 +1468,11 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
                   xbitpos = bitnum % unit;
                   xop0 = adjust_address (xop0, bestmode, xoffset);
  
+                 /* Make sure register is big enough for the whole field. */
+                 if (xoffset * BITS_PER_UNIT + unit 
+                     < offset * BITS_PER_UNIT + bitsize)
+                   goto extzv_loses;
+
                   /* Fetch it to a register in that size.  */
                   xop0 = force_reg (bestmode, xop0);
  
@@ -1488,7 +1552,8 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
    else
      {
        if (HAVE_extv
-         && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
+         && bitsize > 0
+         && GET_MODE_BITSIZE (extv_mode) >= bitsize
           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
         {
@@ -1537,6 +1602,11 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
                   xbitpos = bitnum % unit;
                   xop0 = adjust_address (xop0, bestmode, xoffset);
  
+                 /* Make sure register is big enough for the whole field. */
+                 if (xoffset * BITS_PER_UNIT + unit 
+                     < offset * BITS_PER_UNIT + bitsize)
+                   goto extv_loses;
+
                   /* Fetch it to a register in that size.  */
                   xop0 = force_reg (bestmode, xop0);
  
@@ -2150,7 +2220,7 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
       and shifted in the other direction; but that does not work
       on all machines.  */
  
-  op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
+  op1 = expand_normal (amount);
  
    if (SHIFT_COUNT_TRUNCATED)
      {
@@ -2173,7 +2243,9 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
        && GET_CODE (op1) == CONST_INT
        && INTVAL (op1) > 0
        && INTVAL (op1) < GET_MODE_BITSIZE (mode)
-      && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
+      && INTVAL (op1) < MAX_BITS_PER_WORD
+      && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode]
+      && shift_cost[mode][INTVAL (op1)] != MAX_COST)
      {
        int i;
        for (i = 0; i < INTVAL (op1); i++)
@@ -2216,13 +2288,17 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
                  code below.  */
  
               rtx subtarget = target == shifted ? 0 : target;
+             tree new_amount, other_amount;
               rtx temp1;
               tree type = TREE_TYPE (amount);
-             tree new_amount = make_tree (type, op1);
-             tree other_amount
+             if (GET_MODE (op1) != TYPE_MODE (type)
+                 && GET_MODE (op1) != VOIDmode)
+               op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
+             new_amount = make_tree (type, op1);
+             other_amount
                 = fold_build2 (MINUS_EXPR, type,
                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
-                              amount);
+                              new_amount);
  
               shifted = force_reg (mode, shifted);
  
@@ -2237,19 +2313,6 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
           temp = expand_binop (mode,
                                left ? rotl_optab : rotr_optab,
                                shifted, op1, target, unsignedp, methods);
-
-         /* If we don't have the rotate, but we are rotating by a constant
-            that is in range, try a rotate in the opposite direction.  */
-
-         if (temp == 0 && GET_CODE (op1) == CONST_INT
-             && INTVAL (op1) > 0
-             && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
-           temp = expand_binop (mode,
-                                left ? rotr_optab : rotl_optab,
-                                shifted,
-                                GEN_INT (GET_MODE_BITSIZE (mode)
-                                         - INTVAL (op1)),
-                                target, unsignedp, methods);
         }
        else if (unsignedp)
         temp = expand_binop (mode,
@@ -2364,7 +2427,7 @@ struct algorithm
  /* The entry for our multiplication cache/hash table.  */
  struct alg_hash_entry {
    /* The number we are multiplying by.  */
-  unsigned int t;
+  unsigned HOST_WIDE_INT t;
  
    /* The mode in which we are multiplying something by T.  */
    enum machine_mode mode;
@@ -2379,7 +2442,11 @@ struct alg_hash_entry {
  };
  
  /* The number of cache/hash entries.  */
+#if HOST_BITS_PER_WIDE_INT == 64
+#define NUM_ALG_HASH_ENTRIES 1031
+#else
  #define NUM_ALG_HASH_ENTRIES 307
+#endif
  
  /* Each entry of ALG_HASH caches alg_code for some integer.  This is
     actually a hash table.  If we have a collision, that the older
@@ -2839,6 +2906,17 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
    struct mult_cost limit;
    int op_cost;
  
+  /* Fail quickly for impossible bounds.  */
+  if (mult_cost < 0)
+    return false;
+
+  /* Ensure that mult_cost provides a reasonable upper bound.
+     Any constant multiplication can be performed with less
+     than 2 * bits additions.  */
+  op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
+  if (mult_cost > op_cost)
+    mult_cost = op_cost;
+
    *variant = basic_variant;
    limit.cost = mult_cost;
    limit.latency = mult_cost;
@@ -2904,10 +2982,9 @@ expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
    int opno;
    enum machine_mode nmode;
  
-  /* Avoid referencing memory over and over.
-     For speed, but also for correctness when mem is volatile.  */
-  if (MEM_P (op0))
-    op0 = force_reg (mode, op0);
+  /* Avoid referencing memory over and over and invalid sharing
+     on SUBREGs.  */
+  op0 = force_reg (mode, op0);
  
    /* ACCUM starts out either as OP0 or as a zero, depending on
       the first operation.  */
@@ -3018,7 +3095,8 @@ expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
  
        insn = get_last_insn ();
        set_unique_reg_note (insn, REG_EQUAL,
-                          gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
+                          gen_rtx_MULT (nmode, tem,
+                                        GEN_INT (val_so_far)));
      }
  
    if (variant == negate_variant)
@@ -3159,7 +3237,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
  
    /* Expand x*2.0 as x+x.  */
    if (GET_CODE (op1) == CONST_DOUBLE
-      && GET_MODE_CLASS (mode) == MODE_FLOAT)
+      && SCALAR_FLOAT_MODE_P (mode))
      {
        REAL_VALUE_TYPE d;
        REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
@@ -3365,6 +3443,8 @@ extract_high_half (enum machine_mode mode, rtx op)
    if (mode == word_mode)
      return gen_highpart (mode, op);
  
+  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
+
    wider_mode = GET_MODE_WIDER_MODE (mode);
    op = expand_shift (RSHIFT_EXPR, wider_mode, op,
                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
@@ -3384,6 +3464,8 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
    rtx tem;
    int size;
  
+  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
+
    wider_mode = GET_MODE_WIDER_MODE (mode);
    size = GET_MODE_BITSIZE (mode);
  
@@ -3494,6 +3576,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
    struct algorithm alg;
    rtx tem;
  
+  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
    /* We can't support modes wider than HOST_BITS_PER_INT.  */
    gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
  
@@ -3903,11 +3986,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
    /* Only deduct something for a REM if the last divide done was
       for a different constant.   Then set the constant of the last
       divide.  */
-  max_cost = div_cost[compute_mode]
-    - (rem_flag && ! (last_div_const != 0 && op1_is_constant
-                     && INTVAL (op1) == last_div_const)
-       ? mul_cost[compute_mode] + add_cost[compute_mode]
-       : 0);
+  max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
+  if (rem_flag && ! (last_div_const != 0 && op1_is_constant
+                    && INTVAL (op1) == last_div_const))
+    max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
  
    last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
  
@@ -4093,7 +4175,14 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
                 int lgup, post_shift;
                 rtx mlr;
                 HOST_WIDE_INT d = INTVAL (op1);
-               unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
+               unsigned HOST_WIDE_INT abs_d;
+
+               /* Since d might be INT_MIN, we have to cast to
+                  unsigned HOST_WIDE_INT before negating to avoid
+                  undefined signed overflow.  */
+               abs_d = (d >= 0
+                        ? (unsigned HOST_WIDE_INT) d
+                        : - (unsigned HOST_WIDE_INT) d);
  
                 /* n rem d = n rem -d */
                 if (rem_flag && d < 0)
@@ -4920,17 +5009,17 @@ make_tree (tree type, rtx x)
  
      case CONST_VECTOR:
        {
-       int i, units;
-       rtx elt;
+       int units = CONST_VECTOR_NUNITS (x);
+       tree itype = TREE_TYPE (type);
         tree t = NULL_TREE;
+       int i;
  
-       units = CONST_VECTOR_NUNITS (x);
  
         /* Build a tree with vector elements.  */
         for (i = units - 1; i >= 0; --i)
           {
-           elt = CONST_VECTOR_ELT (x, i);
-           t = tree_cons (NULL_TREE, make_tree (type, elt), t);
+           rtx elt = CONST_VECTOR_ELT (x, i);
+           t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
           }
  
         return build_vector (type, t);
@@ -4956,20 +5045,20 @@ make_tree (tree type, rtx x)
                           make_tree (type, XEXP (x, 1)));
  
      case LSHIFTRT:
-      t = lang_hooks.types.unsigned_type (type);
+      t = unsigned_type_for (type);
        return fold_convert (type, build2 (RSHIFT_EXPR, t,
                                          make_tree (t, XEXP (x, 0)),
                                          make_tree (type, XEXP (x, 1))));
  
      case ASHIFTRT:
-      t = lang_hooks.types.signed_type (type);
+      t = signed_type_for (type);
        return fold_convert (type, build2 (RSHIFT_EXPR, t,
                                          make_tree (t, XEXP (x, 0)),
                                          make_tree (type, XEXP (x, 1))));
  
      case DIV:
        if (TREE_CODE (type) != REAL_TYPE)
-       t = lang_hooks.types.signed_type (type);
+       t = signed_type_for (type);
        else
         t = type;
  
@@ -4977,7 +5066,7 @@ make_tree (tree type, rtx x)
                                          make_tree (t, XEXP (x, 0)),
                                          make_tree (t, XEXP (x, 1))));
      case UDIV:
-      t = lang_hooks.types.unsigned_type (type);
+      t = unsigned_type_for (type);
        return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
                                          make_tree (t, XEXP (x, 0)),
                                          make_tree (t, XEXP (x, 1))));
@@ -4988,6 +5077,15 @@ make_tree (tree type, rtx x)
                                           GET_CODE (x) == ZERO_EXTEND);
        return fold_convert (type, make_tree (t, XEXP (x, 0)));
  
+    case CONST:
+      return make_tree (type, XEXP (x, 0));
+
+    case SYMBOL_REF:
+      t = SYMBOL_REF_DECL (x);
+      if (t)
+       return fold_convert (type, build_fold_addr_expr (t));
+      /* else fall through.  */
+
      default:
        t = build_decl (VAR_DECL, NULL_TREE, type);
  
@@ -5003,69 +5101,6 @@ make_tree (tree type, rtx x)
        return t;
      }
  }
-
-/* Check whether the multiplication X * MULT + ADD overflows.
-   X, MULT and ADD must be CONST_*.
-   MODE is the machine mode for the computation.
-   X and MULT must have mode MODE.  ADD may have a different mode.
-   So can X (defaults to same as MODE).
-   UNSIGNEDP is nonzero to do unsigned multiplication.  */
-
-bool
-const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
-                          enum machine_mode mode, int unsignedp)
-{
-  tree type, mult_type, add_type, result;
-
-  type = lang_hooks.types.type_for_mode (mode, unsignedp);
-
-  /* In order to get a proper overflow indication from an unsigned
-     type, we have to pretend that it's a sizetype.  */
-  mult_type = type;
-  if (unsignedp)
-    {
-      /* FIXME:It would be nice if we could step directly from this
-        type to its sizetype equivalent.  */
-      mult_type = build_distinct_type_copy (type);
-      TYPE_IS_SIZETYPE (mult_type) = 1;
-    }
-
-  add_type = (GET_MODE (add) == VOIDmode ? mult_type
-             : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
-
-  result = fold_build2 (PLUS_EXPR, mult_type,
-                       fold_build2 (MULT_EXPR, mult_type,
-                                    make_tree (mult_type, x),
-                                    make_tree (mult_type, mult)),
-                       make_tree (add_type, add));
-
-  return TREE_CONSTANT_OVERFLOW (result);
-}
-
-/* Return an rtx representing the value of X * MULT + ADD.
-   TARGET is a suggestion for where to store the result (an rtx).
-   MODE is the machine mode for the computation.
-   X and MULT must have mode MODE.  ADD may have a different mode.
-   So can X (defaults to same as MODE).
-   UNSIGNEDP is nonzero to do unsigned multiplication.
-   This may emit insns.  */
-
-rtx
-expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
-                int unsignedp)
-{
-  tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
-  tree add_type = (GET_MODE (add) == VOIDmode
-                  ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
-                                                          unsignedp));
-  tree result = fold_build2 (PLUS_EXPR, type,
-                            fold_build2 (MULT_EXPR, type,
-                                         make_tree (type, x),
-                                         make_tree (type, mult)),
-                            make_tree (add_type, add));
-
-  return expand_expr (result, target, VOIDmode, 0);
-}
  \f
  /* Compute the logical-and of OP0 and OP1, storing it in TARGET
     and returning TARGET.
@@ -5089,6 +5124,77 @@ expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
    return target;
  }
  \f
+/* Helper function for emit_store_flag.  */
+static rtx
+emit_store_flag_1 (rtx target, rtx subtarget, enum machine_mode mode,
+                  int normalizep)
+{
+  rtx op0;
+  enum machine_mode target_mode = GET_MODE (target);
+  
+  /* If we are converting to a wider mode, first convert to
+     TARGET_MODE, then normalize.  This produces better combining
+     opportunities on machines that have a SIGN_EXTRACT when we are
+     testing a single bit.  This mostly benefits the 68k.
+
+     If STORE_FLAG_VALUE does not have the sign bit set when
+     interpreted in MODE, we can do this conversion as unsigned, which
+     is usually more efficient.  */
+  if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
+    {
+      convert_move (target, subtarget,
+                   (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
+                   && 0 == (STORE_FLAG_VALUE
+                            & ((HOST_WIDE_INT) 1
+                               << (GET_MODE_BITSIZE (mode) -1))));
+      op0 = target;
+      mode = target_mode;
+    }
+  else
+    op0 = subtarget;
+
+  /* If we want to keep subexpressions around, don't reuse our last
+     target.  */
+  if (optimize)
+    subtarget = 0;
+
+  /* Now normalize to the proper value in MODE.  Sometimes we don't
+     have to do anything.  */
+  if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
+    ;
+  /* STORE_FLAG_VALUE might be the most negative number, so write
+     the comparison this way to avoid a compiler-time warning.  */
+  else if (- normalizep == STORE_FLAG_VALUE)
+    op0 = expand_unop (mode, neg_optab, op0, subtarget, 0);
+
+  /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
+     it hard to use a value of just the sign bit due to ANSI integer
+     constant typing rules.  */
+  else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
+          && (STORE_FLAG_VALUE
+              & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))
+    op0 = expand_shift (RSHIFT_EXPR, mode, op0,
+                       size_int (GET_MODE_BITSIZE (mode) - 1), subtarget,
+                       normalizep == 1);
+  else
+    {
+      gcc_assert (STORE_FLAG_VALUE & 1);
+
+      op0 = expand_and (mode, op0, const1_rtx, subtarget);
+      if (normalizep == -1)
+       op0 = expand_unop (mode, neg_optab, op0, op0, 0);
+    }
+
+  /* If we were converting to a smaller mode, do the conversion now.  */
+  if (target_mode != mode)
+    {
+      convert_move (target, op0, 0);
+      return target;
+    }
+  else
+    return op0;
+}
+
  /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
     and storing in TARGET.  Normally return TARGET.
     Return 0 if that cannot be done.
@@ -5178,12 +5284,14 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
         {
           rtx op00, op01, op0both;
  
-         /* Do a logical OR or AND of the two words and compare the result.  */
+         /* Do a logical OR or AND of the two words and compare the
+            result.  */
           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
           op0both = expand_binop (word_mode,
                                   op1 == const0_rtx ? ior_optab : and_optab,
-                                 op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
+                                 op00, op01, NULL_RTX, unsignedp,
+                                 OPTAB_DIRECT);
  
           if (op0both != 0)
             return emit_store_flag (target, code, op0both, op1, word_mode,
@@ -5195,15 +5303,13 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
  
           /* If testing the sign bit, can just test on high word.  */
           op0h = simplify_gen_subreg (word_mode, op0, mode,
-                                     subreg_highpart_offset (word_mode, mode));
+                                     subreg_highpart_offset (word_mode,
+                                                             mode));
           return emit_store_flag (target, code, op0h, op1, word_mode,
                                   unsignedp, normalizep);
         }
      }
  
-  /* From now on, we won't change CODE, so set ICODE now.  */
-  icode = setcc_gen_code[(int) code];
-
    /* If this is A < 0 or A >= 0, we can do this by taking the ones
       complement of A (for GE) and shifting the sign bit to the low bit.  */
    if (op1 == const0_rtx && (code == LT || code == GE)
@@ -5211,7 +5317,8 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
        && (normalizep || STORE_FLAG_VALUE == 1
           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
-                 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
+                 == ((unsigned HOST_WIDE_INT) 1
+                     << (GET_MODE_BITSIZE (mode) - 1))))))
      {
        subtarget = target;
  
@@ -5246,6 +5353,8 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
        return op0;
      }
  
+  icode = setcc_gen_code[(int) code];
+
    if (icode != CODE_FOR_nothing)
      {
        insn_operand_predicate_fn pred;
@@ -5303,72 +5412,65 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
        if (pattern)
         {
           emit_insn (pattern);
+         return emit_store_flag_1 (target, subtarget, compare_mode,
+                                   normalizep);
+       }
+    }
+  else
+    {
+      /* We don't have an scc insn, so try a cstore insn.  */
+
+      for (compare_mode = mode; compare_mode != VOIDmode;
+          compare_mode = GET_MODE_WIDER_MODE (compare_mode))
+       {
+         icode = cstore_optab->handlers[(int) compare_mode].insn_code;
+         if (icode != CODE_FOR_nothing)
+           break;
+       }
+
+      if (icode != CODE_FOR_nothing)
+       {
+         enum machine_mode result_mode
+           = insn_data[(int) icode].operand[0].mode;
+         rtx cstore_op0 = op0;
+         rtx cstore_op1 = op1;
  
-         /* If we are converting to a wider mode, first convert to
-            TARGET_MODE, then normalize.  This produces better combining
-            opportunities on machines that have a SIGN_EXTRACT when we are
-            testing a single bit.  This mostly benefits the 68k.
+         do_pending_stack_adjust ();
+         last = get_last_insn ();
  
-            If STORE_FLAG_VALUE does not have the sign bit set when
-            interpreted in COMPARE_MODE, we can do this conversion as
-            unsigned, which is usually more efficient.  */
-         if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
+         if (compare_mode != mode)
             {
-             convert_move (target, subtarget,
-                           (GET_MODE_BITSIZE (compare_mode)
-                            <= HOST_BITS_PER_WIDE_INT)
-                           && 0 == (STORE_FLAG_VALUE
-                                    & ((HOST_WIDE_INT) 1
-                                       << (GET_MODE_BITSIZE (compare_mode) -1))));
-             op0 = target;
-             compare_mode = target_mode;
+             cstore_op0 = convert_modes (compare_mode, mode, cstore_op0,
+                                         unsignedp);
+             cstore_op1 = convert_modes (compare_mode, mode, cstore_op1,
+                                         unsignedp);
             }
-         else
-           op0 = subtarget;
+         
+         if (!insn_data[(int) icode].operand[2].predicate (cstore_op0,
+                                                           compare_mode))
+           cstore_op0 = copy_to_mode_reg (compare_mode, cstore_op0);
  
-         /* If we want to keep subexpressions around, don't reuse our
-            last target.  */
+         if (!insn_data[(int) icode].operand[3].predicate (cstore_op1,
+                                                           compare_mode))
+           cstore_op1 = copy_to_mode_reg (compare_mode, cstore_op1);
  
-         if (optimize)
-           subtarget = 0;
+         comparison = gen_rtx_fmt_ee (code, result_mode, cstore_op0,
+                                      cstore_op1);
+         subtarget = target;
  
-         /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
-            we don't have to do anything.  */
-         if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
-           ;
-         /* STORE_FLAG_VALUE might be the most negative number, so write
-            the comparison this way to avoid a compiler-time warning.  */
-         else if (- normalizep == STORE_FLAG_VALUE)
-           op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
-
-         /* We don't want to use STORE_FLAG_VALUE < 0 below since this
-            makes it hard to use a value of just the sign bit due to
-            ANSI integer constant typing rules.  */
-         else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
-                  && (STORE_FLAG_VALUE
-                      & ((HOST_WIDE_INT) 1
-                         << (GET_MODE_BITSIZE (compare_mode) - 1))))
-           op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
-                               size_int (GET_MODE_BITSIZE (compare_mode) - 1),
-                               subtarget, normalizep == 1);
-         else
-           {
-             gcc_assert (STORE_FLAG_VALUE & 1);
-             
-             op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
-             if (normalizep == -1)
-               op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
-           }
+         if (optimize || !(insn_data[(int) icode].operand[0].predicate
+                           (subtarget, result_mode)))
+           subtarget = gen_reg_rtx (result_mode);
  
-         /* If we were converting to a smaller mode, do the
-            conversion now.  */
-         if (target_mode != compare_mode)
+         pattern = GEN_FCN (icode) (subtarget, comparison, cstore_op0,
+                                    cstore_op1);
+
+         if (pattern)
             {
-             convert_move (target, op0, 0);
-             return target;
+             emit_insn (pattern);
+             return emit_store_flag_1 (target, subtarget, result_mode,
+                                       normalizep);
             }
-         else
-           return op0;
         }
      }
  
@@ -5581,66 +5683,14 @@ emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
  }
  \f
  /* Perform possibly multi-word comparison and conditional jump to LABEL
-   if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
-
-   The algorithm is based on the code in expr.c:do_jump.
-
-   Note that this does not perform a general comparison.  Only
-   variants generated within expmed.c are correctly handled, others
-   could be handled if needed.  */
+   if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
+   now a thin wrapper around do_compare_rtx_and_jump.  */
  
  static void
  do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
                  rtx label)
  {
-  /* If this mode is an integer too wide to compare properly,
-     compare word by word.  Rely on cse to optimize constant cases.  */
-
-  if (GET_MODE_CLASS (mode) == MODE_INT
-      && ! can_compare_p (op, mode, ccp_jump))
-    {
-      rtx label2 = gen_label_rtx ();
-
-      switch (op)
-       {
-       case LTU:
-         do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
-         break;
-
-       case LEU:
-         do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
-         break;
-
-       case LT:
-         do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
-         break;
-
-       case GT:
-         do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
-         break;
-
-       case GE:
-         do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
-         break;
-
-         /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
-            that's the only equality operations we do */
-       case EQ:
-         gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
-         do_jump_by_parts_equality_rtx (arg1, label2, label);
-         break;
-
-       case NE:
-         gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
-         do_jump_by_parts_equality_rtx (arg1, label, label2);
-         break;
-
-       default:
-         gcc_unreachable ();
-       }
-
-      emit_label (label2);
-    }
-  else
-    emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
+  int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
+  do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
+                          NULL_RTX, NULL_RTX, label);
  }