X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;ds=sidebyside;f=gcc%2Fexpmed.c;h=ae76695a133b7dc15e60bfccd5deb7bf014ae397;hb=a79b863a00888e242f9ceda4f1f70a8772ecf727;hp=d2edd813c964ea35f15119b4fd593ddcbc873185;hpb=2d232d05278c1a5e45612ed694993cf7e9e5f963;p=pf3gnuchains%2Fgcc-fork.git

diff --git a/gcc/expmed.c b/gcc/expmed.c
index d2edd813c96..ae76695a133 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -1,7 +1,7 @@
 /* Medium-level subroutines: convert bit-field store and extract
    and shifts, multiplies and divides to rtl instructions.
    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
-   1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+   1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -23,6 +23,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 
 #include "config.h"
 #include "system.h"
+#include "coretypes.h"
+#include "tm.h"
 #include "toplev.h"
 #include "rtl.h"
 #include "tree.h"
@@ -33,32 +35,30 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #include "optabs.h"
 #include "real.h"
 #include "recog.h"
-
-static void store_fixed_bit_field	PARAMS ((rtx, unsigned HOST_WIDE_INT,
-						 unsigned HOST_WIDE_INT,
-						 unsigned HOST_WIDE_INT, rtx));
-static void store_split_bit_field	PARAMS ((rtx, unsigned HOST_WIDE_INT,
-						 unsigned HOST_WIDE_INT, rtx));
-static rtx extract_fixed_bit_field	PARAMS ((enum machine_mode, rtx,
-						 unsigned HOST_WIDE_INT,
-						 unsigned HOST_WIDE_INT,
-						 unsigned HOST_WIDE_INT,
-						 rtx, int));
-static rtx mask_rtx			PARAMS ((enum machine_mode, int,
-						 int, int));
-static rtx lshift_value			PARAMS ((enum machine_mode, rtx,
-						 int, int));
-static rtx extract_split_bit_field	PARAMS ((rtx, unsigned HOST_WIDE_INT,
-						 unsigned HOST_WIDE_INT, int));
-static void do_cmp_and_jump		PARAMS ((rtx, rtx, enum rtx_code,
-						 enum machine_mode, rtx));
-
-/* Non-zero means divides or modulus operations are relatively cheap for
+#include "langhooks.h"
+
+static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
+				   unsigned HOST_WIDE_INT,
+				   unsigned HOST_WIDE_INT, rtx);
+static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
+				   unsigned HOST_WIDE_INT, rtx);
+static rtx extract_fixed_bit_field (enum machine_mode, rtx,
+				    unsigned HOST_WIDE_INT,
+				    unsigned HOST_WIDE_INT,
+				    unsigned HOST_WIDE_INT, rtx, int);
+static rtx mask_rtx (enum machine_mode, int, int, int);
+static rtx lshift_value (enum machine_mode, rtx, int, int);
+static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
+				    unsigned HOST_WIDE_INT, int);
+static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
+
+/* Nonzero means divides or modulus operations are relatively cheap for
    powers of two, so don't use branches; emit the operation instead.
    Usually, this will mean that the MD file will emit non-branch
    sequences.  */
 
-static int sdiv_pow2_cheap, smod_pow2_cheap;
+static int sdiv_pow2_cheap[NUM_MACHINE_MODES];
+static int smod_pow2_cheap[NUM_MACHINE_MODES];
 
 #ifndef SLOW_UNALIGNED_ACCESS
 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
@@ -91,100 +91,66 @@ static int sdiv_pow2_cheap, smod_pow2_cheap;
 
 /* Cost of various pieces of RTL.  Note that some of these are indexed by
    shift count and some by mode.  */
-static int add_cost, negate_cost, zero_cost;
-static int shift_cost[MAX_BITS_PER_WORD];
-static int shiftadd_cost[MAX_BITS_PER_WORD];
-static int shiftsub_cost[MAX_BITS_PER_WORD];
+static int zero_cost;
+static int add_cost[NUM_MACHINE_MODES];
+static int neg_cost[NUM_MACHINE_MODES];
+static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
+static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
+static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 static int mul_cost[NUM_MACHINE_MODES];
 static int div_cost[NUM_MACHINE_MODES];
 static int mul_widen_cost[NUM_MACHINE_MODES];
 static int mul_highpart_cost[NUM_MACHINE_MODES];
 
 void
-init_expmed ()
+init_expmed (void)
 {
-  /* This is "some random pseudo register" for purposes of calling recog
-     to see what insns exist.  */
-  rtx reg = gen_rtx_REG (word_mode, 10000);
-  rtx shift_insn, shiftadd_insn, shiftsub_insn;
+  rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;
+  rtx shift_pat, shiftadd_pat, shiftsub_pat;
+  rtx pow2[MAX_BITS_PER_WORD];
+  rtx cint[MAX_BITS_PER_WORD];
   int dummy;
-  int m;
+  int m, n;
   enum machine_mode mode, wider_mode;
 
   start_sequence ();
 
-  reg = gen_rtx_REG (word_mode, 10000);
-
   zero_cost = rtx_cost (const0_rtx, 0);
-  add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
-
-  shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
-				       gen_rtx_ASHIFT (word_mode, reg,
-						       const0_rtx)));
-
-  shiftadd_insn
-    = emit_insn (gen_rtx_SET (VOIDmode, reg,
-			      gen_rtx_PLUS (word_mode,
-					    gen_rtx_MULT (word_mode,
-							  reg, const0_rtx),
-					    reg)));
-
-  shiftsub_insn
-    = emit_insn (gen_rtx_SET (VOIDmode, reg,
-			      gen_rtx_MINUS (word_mode,
-					     gen_rtx_MULT (word_mode,
-							   reg, const0_rtx),
-					     reg)));
 
   init_recog ();
 
-  shift_cost[0] = 0;
-  shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
-
   for (m = 1; m < MAX_BITS_PER_WORD; m++)
     {
-      shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
-
-      XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
-      if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
-	shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
-
-      XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
-	= GEN_INT ((HOST_WIDE_INT) 1 << m);
-      if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
-	shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
-
-      XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
-	= GEN_INT ((HOST_WIDE_INT) 1 << m);
-      if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
-	shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
+      pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
+      cint[m] = GEN_INT (m);
     }
 
-  negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
-
-  sdiv_pow2_cheap
-    = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
-       <= 2 * add_cost);
-  smod_pow2_cheap
-    = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
-       <= 2 * add_cost);
-
   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
        mode != VOIDmode;
        mode = GET_MODE_WIDER_MODE (mode))
     {
       reg = gen_rtx_REG (mode, 10000);
-      div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
-      mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
+      add_cost[mode] = rtx_cost (gen_rtx_PLUS (mode, reg, reg), SET);
+      neg_cost[mode] = rtx_cost (gen_rtx_NEG (mode, reg), SET);
+      div_cost[mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
+      mul_cost[mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
+
+      sdiv_pow2_cheap[mode]
+	= (rtx_cost (gen_rtx_DIV (mode, reg, GEN_INT (32)), SET)
+	   <= 2 * add_cost[mode]);
+      smod_pow2_cheap[mode]
+	= (rtx_cost (gen_rtx_MOD (mode, reg, GEN_INT (32)), SET)
+	   <= 2 * add_cost[mode]);
+
       wider_mode = GET_MODE_WIDER_MODE (mode);
       if (wider_mode != VOIDmode)
 	{
-	  mul_widen_cost[(int) wider_mode]
+	  mul_widen_cost[wider_mode]
 	    = rtx_cost (gen_rtx_MULT (wider_mode,
 				      gen_rtx_ZERO_EXTEND (wider_mode, reg),
 				      gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 			SET);
-	  mul_highpart_cost[(int) mode]
+	  mul_highpart_cost[mode]
 	    = rtx_cost (gen_rtx_TRUNCATE
 			(mode,
 			 gen_rtx_LSHIFTRT (wider_mode,
@@ -196,6 +162,52 @@ init_expmed ()
 					   GEN_INT (GET_MODE_BITSIZE (mode)))),
 			SET);
 	}
+
+	shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
+					     gen_rtx_ASHIFT (mode, reg,
+							     const0_rtx)));
+
+	shiftadd_insn
+	  = emit_insn (gen_rtx_SET (VOIDmode, reg,
+				    gen_rtx_PLUS (mode,
+						  gen_rtx_MULT (mode,
+								reg,
+								const0_rtx),
+						  reg)));
+
+	shiftsub_insn
+	  = emit_insn (gen_rtx_SET (VOIDmode, reg,
+				    gen_rtx_MINUS (mode,
+						   gen_rtx_MULT (mode,
+								 reg,
+								 const0_rtx),
+						   reg)));
+
+	shift_pat = PATTERN (shift_insn);
+	shiftadd_pat = PATTERN (shiftadd_insn);
+	shiftsub_pat = PATTERN (shiftsub_insn);
+
+	shift_cost[mode][0] = 0;
+	shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
+
+	n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
+	for (m = 1; m < n; m++)
+	  {
+	    shift_cost[mode][m] = 32000;
+	    XEXP (SET_SRC (shift_pat), 1) = cint[m];
+	    if (recog (shift_pat, shift_insn, &dummy) >= 0)
+	      shift_cost[mode][m] = rtx_cost (SET_SRC (shift_pat), SET);
+
+	    shiftadd_cost[mode][m] = 32000;
+	    XEXP (XEXP (SET_SRC (shiftadd_pat), 0), 1) = pow2[m];
+	    if (recog (shiftadd_pat, shiftadd_insn, &dummy) >= 0)
+	      shiftadd_cost[mode][m] = rtx_cost (SET_SRC (shiftadd_pat), SET);
+
+	    shiftsub_cost[mode][m] = 32000;
+	    XEXP (XEXP (SET_SRC (shiftsub_pat), 0), 1) = pow2[m];
+	    if (recog (shiftsub_pat, shiftsub_insn, &dummy) >= 0)
+	      shiftsub_cost[mode][m] = rtx_cost (SET_SRC (shiftsub_pat), SET);
+	  }
     }
 
   end_sequence ();
@@ -206,9 +218,7 @@ init_expmed ()
    useful if X is a CONST_INT.  */
 
 rtx
-negate_rtx (mode, x)
-     enum machine_mode mode;
-     rtx x;
+negate_rtx (enum machine_mode mode, rtx x)
 {
   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 
@@ -223,9 +233,7 @@ negate_rtx (mode, x)
    is false; else the mode of the specified operand.  If OPNO is -1,
    all the caller cares about is whether the insn is available.  */
 enum machine_mode
-mode_for_extraction (pattern, opno)
-     enum extraction_pattern pattern;
-     int opno;
+mode_for_extraction (enum extraction_pattern pattern, int opno)
 {
   const struct insn_data *data;
 
@@ -286,13 +294,9 @@ mode_for_extraction (pattern, opno)
    else, we use the mode of operand 3.  */
 
 rtx
-store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
-     rtx str_rtx;
-     unsigned HOST_WIDE_INT bitsize;
-     unsigned HOST_WIDE_INT bitnum;
-     enum machine_mode fieldmode;
-     rtx value;
-     HOST_WIDE_INT total_size;
+store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
+		 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
+		 rtx value, HOST_WIDE_INT total_size)
 {
   unsigned int unit
     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
@@ -324,6 +328,53 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
 
   value = protect_from_queue (value, 0);
 
+  /* Use vec_extract patterns for extracting parts of vectors whenever
+     available.  */
+  if (VECTOR_MODE_P (GET_MODE (op0))
+      && GET_CODE (op0) != MEM
+      && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
+	  != CODE_FOR_nothing)
+      && fieldmode == GET_MODE_INNER (GET_MODE (op0))
+      && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
+      && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
+    {
+      enum machine_mode outermode = GET_MODE (op0);
+      enum machine_mode innermode = GET_MODE_INNER (outermode);
+      int icode = (int) vec_set_optab->handlers[outermode].insn_code;
+      int pos = bitnum / GET_MODE_BITSIZE (innermode);
+      rtx rtxpos = GEN_INT (pos);
+      rtx src = value;
+      rtx dest = op0;
+      rtx pat, seq;
+      enum machine_mode mode0 = insn_data[icode].operand[0].mode;
+      enum machine_mode mode1 = insn_data[icode].operand[1].mode;
+      enum machine_mode mode2 = insn_data[icode].operand[2].mode;
+
+      start_sequence ();
+
+      if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
+	src = copy_to_mode_reg (mode1, src);
+
+      if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
+	rtxpos = copy_to_mode_reg (mode1, rtxpos);
+
+      /* We could handle this, but we should always be called with a pseudo
+	 for our targets and all insns should take them as outputs.  */
+      if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
+	  || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
+	  || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
+	abort ();
+      pat = GEN_FCN (icode) (dest, src, rtxpos);
+      seq = get_insns ();
+      end_sequence ();
+      if (pat)
+	{
+	  emit_insn (seq);
+	  emit_insn (pat);
+	  return dest;
+	}
+    }
+
   if (flag_force_mem)
     {
       int old_generating_concat_p = generating_concat_p;
@@ -347,7 +398,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
       && (GET_CODE (op0) != MEM
 	  ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 	     || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
-            && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
+	     && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 	  : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 	     || (offset * BITS_PER_UNIT % bitsize == 0
 		 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
@@ -366,7 +417,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
 		   subregs results in Severe Tire Damage.  */
 		abort ();
 	    }
-	  if (GET_CODE (op0) == REG)
+	  if (REG_P (op0))
 	    op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 	  else
 	    op0 = adjust_address (op0, fieldmode, offset);
@@ -415,13 +466,13 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
   if (GET_CODE (op0) != MEM
       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
       && bitsize == GET_MODE_BITSIZE (fieldmode)
-      && (movstrict_optab->handlers[(int) fieldmode].insn_code
+      && (movstrict_optab->handlers[fieldmode].insn_code
 	  != CODE_FOR_nothing))
     {
-      int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
+      int icode = movstrict_optab->handlers[fieldmode].insn_code;
 
       /* Get appropriate low part of the value being stored.  */
-      if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
+      if (GET_CODE (value) == CONST_INT || REG_P (value))
 	value = gen_lowpart (fieldmode, value);
       else if (!(GET_CODE (value) == SYMBOL_REF
 		 || GET_CODE (value) == LABEL_REF
@@ -472,7 +523,9 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
 	 VOIDmode, because that is what store_field uses to indicate that this
 	 is a bit field, but passing VOIDmode to operand_subword_force will
 	 result in an abort.  */
-      fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
+      fieldmode = GET_MODE (value);
+      if (fieldmode == VOIDmode)
+	fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 
       for (i = 0; i < nwords; i++)
 	{
@@ -488,10 +541,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
 	  store_bit_field (op0, MIN (BITS_PER_WORD,
 				     bitsize - i * BITS_PER_WORD),
 			   bitnum + bit_offset, word_mode,
-			   operand_subword_force (value, wordnum,
-						  (GET_MODE (value) == VOIDmode
-						   ? fieldmode
-						   : GET_MODE (value))),
+			   operand_subword_force (value, wordnum, fieldmode),
 			   total_size);
 	}
       return value;
@@ -508,7 +558,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
       if (offset != 0
 	  || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 	{
-	  if (GET_CODE (op0) != REG)
+	  if (!REG_P (op0))
 	    {
 	      /* Since this is a destination (lvalue), we can't copy it to a
 		 pseudo.  We can trivially remove a SUBREG that does not
@@ -535,7 +585,9 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
      structure fields.  */
   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
-    value = gen_lowpart (word_mode, value);
+    value = gen_lowpart ((GET_MODE (value) == VOIDmode
+			  ? word_mode : int_mode_for_mode (GET_MODE (value))),
+			 value);
 
   /* Now OFFSET is nonzero only if OP0 is memory
      and is therefore always measured in bytes.  */
@@ -545,7 +597,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
       /* Ensure insv's size is wide enough for this field.  */
       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
-      && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
+      && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 	    && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
     {
       int xbitpos = bitpos;
@@ -614,7 +666,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
 	/* We can't just change the mode, because this might clobber op0,
 	   and we will need the original value of op0 if insv fails.  */
 	xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
-      if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
+      if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 	xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 
       /* On big-endian machines, we count bits from the most significant.
@@ -675,7 +727,7 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
       if (pat)
 	emit_insn (pat);
       else
-        {
+	{
 	  delete_insns_since (last);
 	  store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 	}
@@ -699,10 +751,9 @@ store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 
 static void
-store_fixed_bit_field (op0, offset, bitsize, bitpos, value)
-     rtx op0;
-     unsigned HOST_WIDE_INT offset, bitsize, bitpos;
-     rtx value;
+store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
+		       unsigned HOST_WIDE_INT bitsize,
+		       unsigned HOST_WIDE_INT bitpos, rtx value)
 {
   enum machine_mode mode;
   unsigned int total_bits = BITS_PER_WORD;
@@ -717,7 +768,7 @@ store_fixed_bit_field (op0, offset, bitsize, bitpos, value)
      and a field split across two bytes.
      Such cases are not supposed to be able to occur.  */
 
-  if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
+  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
     {
       if (offset != 0)
 	abort ();
@@ -737,8 +788,8 @@ store_fixed_bit_field (op0, offset, bitsize, bitpos, value)
 
       mode = GET_MODE (op0);
       if (GET_MODE_BITSIZE (mode) == 0
-          || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
-        mode = word_mode;
+	  || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
+	mode = word_mode;
       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 			    MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 
@@ -815,7 +866,7 @@ store_fixed_bit_field (op0, offset, bitsize, bitpos, value)
 
       if (GET_MODE (value) != mode)
 	{
-	  if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
+	  if ((REG_P (value) || GET_CODE (value) == SUBREG)
 	      && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 	    value = gen_lowpart (mode, value);
 	  else
@@ -834,7 +885,7 @@ store_fixed_bit_field (op0, offset, bitsize, bitpos, value)
   /* Now clear the chosen bits in OP0,
      except that if VALUE is -1 we need not bother.  */
 
-  subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
+  subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
 
   if (! all_one)
     {
@@ -865,17 +916,15 @@ store_fixed_bit_field (op0, offset, bitsize, bitpos, value)
    This does not yet handle fields wider than BITS_PER_WORD.  */
 
 static void
-store_split_bit_field (op0, bitsize, bitpos, value)
-     rtx op0;
-     unsigned HOST_WIDE_INT bitsize, bitpos;
-     rtx value;
+store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
+		       unsigned HOST_WIDE_INT bitpos, rtx value)
 {
   unsigned int unit;
   unsigned int bitsdone = 0;
 
   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
      much at a time.  */
-  if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
+  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
     unit = BITS_PER_WORD;
   else
     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
@@ -964,7 +1013,7 @@ store_split_bit_field (op0, bitsize, bitpos, value)
 					GET_MODE (SUBREG_REG (op0)));
 	  offset = 0;
 	}
-      else if (GET_CODE (op0) == REG)
+      else if (REG_P (op0))
 	{
 	  word = operand_subword_force (op0, offset, GET_MODE (op0));
 	  offset = 0;
@@ -1001,15 +1050,10 @@ store_split_bit_field (op0, bitsize, bitpos, value)
    if they are equally easy.  */
 
 rtx
-extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
-		   target, mode, tmode, total_size)
-     rtx str_rtx;
-     unsigned HOST_WIDE_INT bitsize;
-     unsigned HOST_WIDE_INT bitnum;
-     int unsignedp;
-     rtx target;
-     enum machine_mode mode, tmode;
-     HOST_WIDE_INT total_size;
+extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
+		   unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
+		   enum machine_mode mode, enum machine_mode tmode,
+		   HOST_WIDE_INT total_size)
 {
   unsigned int unit
     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
@@ -1032,29 +1076,19 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 
   if (tmode == VOIDmode)
     tmode = mode;
+
   while (GET_CODE (op0) == SUBREG)
     {
-      int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
-      int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
-
-      offset += SUBREG_BYTE (op0) / UNITS_PER_WORD;
-
-      inner_size = MIN (inner_size, BITS_PER_WORD);
-
-      if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
+      bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
+      if (bitpos > unit)
 	{
-	  bitpos += inner_size - outer_size;
-	  if (bitpos > unit)
-	    {
-	      offset += (bitpos / unit);
-	      bitpos %= unit;
-	    }
+	  offset += (bitpos / unit);
+	  bitpos %= unit;
 	}
-
       op0 = SUBREG_REG (op0);
     }
 
-  if (GET_CODE (op0) == REG
+  if (REG_P (op0)
       && mode == GET_MODE (op0)
       && bitnum == 0
       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
@@ -1063,6 +1097,61 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
       return op0;
     }
 
+  /* Use vec_extract patterns for extracting parts of vectors whenever
+     available.  */
+  if (VECTOR_MODE_P (GET_MODE (op0))
+      && GET_CODE (op0) != MEM
+      && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
+	  != CODE_FOR_nothing)
+      && ((bitsize + bitnum) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
+	  == bitsize / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
+    {
+      enum machine_mode outermode = GET_MODE (op0);
+      enum machine_mode innermode = GET_MODE_INNER (outermode);
+      int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
+      int pos = bitnum / GET_MODE_BITSIZE (innermode);
+      rtx rtxpos = GEN_INT (pos);
+      rtx src = op0;
+      rtx dest = NULL, pat, seq;
+      enum machine_mode mode0 = insn_data[icode].operand[0].mode;
+      enum machine_mode mode1 = insn_data[icode].operand[1].mode;
+      enum machine_mode mode2 = insn_data[icode].operand[2].mode;
+
+      if (innermode == tmode || innermode == mode)
+	dest = target;
+
+      if (!dest)
+	dest = gen_reg_rtx (innermode);
+
+      start_sequence ();
+
+      if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
+	dest = copy_to_mode_reg (mode0, dest);
+
+      if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
+	src = copy_to_mode_reg (mode1, src);
+
+      if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
+	rtxpos = copy_to_mode_reg (mode1, rtxpos);
+
+      /* We could handle this, but we should always be called with a pseudo
+	 for our targets and all insns should take them as outputs.  */
+      if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
+	  || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
+	  || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
+	abort ();
+
+      pat = GEN_FCN (icode) (dest, src, rtxpos);
+      seq = get_insns ();
+      end_sequence ();
+      if (pat)
+	{
+	  emit_insn (seq);
+	  emit_insn (pat);
+	  return dest;
+	}
+    }
+
   /* Make sure we are playing with integral modes.  Pun with subregs
      if we aren't.  */
   {
@@ -1087,9 +1176,13 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
       set_mem_expr (op0, 0);
     }
 
-  /* ??? We currently assume TARGET is at least as big as BITSIZE.
-     If that's wrong, the solution is to test for it and set TARGET to 0
-     if needed.  */
+  /* Extraction of a full-word or multi-word value from a structure
+     in a register or aligned memory can be done with just a SUBREG.
+     A subword value in the least significant part of a register
+     can also be extracted with a SUBREG.  For this, we need the
+     byte offset of the value in op0.  */
+
+  byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
 
   /* If OP0 is a register, BITPOS must count within a word.
      But as we have it, it counts within whatever size OP0 now has.
@@ -1099,38 +1192,38 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 
-  /* Extracting a full-word or multi-word value
-     from a structure in a register or aligned memory.
-     This can be done with just SUBREG.
-     So too extracting a subword value in
-     the least significant part of the register.  */
-
-  byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
-                + (offset * UNITS_PER_WORD);
+  /* ??? We currently assume TARGET is at least as big as BITSIZE.
+     If that's wrong, the solution is to test for it and set TARGET to 0
+     if needed.  */
 
-  mode1  = (VECTOR_MODE_P (tmode)
-           ? mode
-	   : mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0));
-
-  if (((GET_CODE (op0) != MEM
-	&& TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
-				  GET_MODE_BITSIZE (GET_MODE (op0)))
-	&& GET_MODE_SIZE (mode1) != 0
-	&& byte_offset % GET_MODE_SIZE (mode1) == 0)
-       || (GET_CODE (op0) == MEM
-	   && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
-	       || (offset * BITS_PER_UNIT % bitsize == 0
-		   && MEM_ALIGN (op0) % bitsize == 0))))
-      && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
-	   && bitpos % BITS_PER_WORD == 0)
-	  || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
-	      /* ??? The big endian test here is wrong.  This is correct
-		 if the value is in a register, and if mode_for_size is not
-		 the same mode as op0.  This causes us to get unnecessarily
-		 inefficient code from the Thumb port when -mbig-endian.  */
-	      && (BYTES_BIG_ENDIAN
-		  ? bitpos + bitsize == BITS_PER_WORD
-		  : bitpos == 0))))
+  /* Only scalar integer modes can be converted via subregs.  There is an
+     additional problem for FP modes here in that they can have a precision
+     which is different from the size.  mode_for_size uses precision, but
+     we want a mode based on the size, so we must avoid calling it for FP
+     modes.  */
+  mode1  = (SCALAR_INT_MODE_P (tmode)
+	    ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
+	    : mode);
+
+  if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
+	&& bitpos % BITS_PER_WORD == 0)
+       || (mode1 != BLKmode
+	   /* ??? The big endian test here is wrong.  This is correct
+	      if the value is in a register, and if mode_for_size is not
+	      the same mode as op0.  This causes us to get unnecessarily
+	      inefficient code from the Thumb port when -mbig-endian.  */
+	   && (BYTES_BIG_ENDIAN
+	       ? bitpos + bitsize == BITS_PER_WORD
+	       : bitpos == 0)))
+      && ((GET_CODE (op0) != MEM
+	   && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
+				     GET_MODE_BITSIZE (GET_MODE (op0)))
+	   && GET_MODE_SIZE (mode1) != 0
+	   && byte_offset % GET_MODE_SIZE (mode1) == 0)
+	  || (GET_CODE (op0) == MEM
+	      && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
+		  || (offset * BITS_PER_UNIT % bitsize == 0
+		      && MEM_ALIGN (op0) % bitsize == 0)))))
     {
       if (mode1 != GET_MODE (op0))
 	{
@@ -1144,9 +1237,9 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 		/* Else we've got some float mode source being extracted into
 		   a different float mode destination -- this combination of
 		   subregs results in Severe Tire Damage.  */
-		abort ();
+		goto no_subreg_mode_swap;
 	    }
-	  if (GET_CODE (op0) == REG)
+	  if (REG_P (op0))
 	    op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
 	  else
 	    op0 = adjust_address (op0, mode1, offset);
@@ -1155,6 +1248,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 	return convert_to_mode (tmode, op0, unsignedp);
       return op0;
     }
+ no_subreg_mode_swap:
 
   /* Handle fields bigger than a word.  */
 
@@ -1168,7 +1262,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
       unsigned int i;
 
-      if (target == 0 || GET_CODE (target) != REG)
+      if (target == 0 || !REG_P (target))
 	target = gen_reg_rtx (mode);
 
       /* Indicate for flow that the entire target reg is being set.  */
@@ -1252,7 +1346,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
       if (offset != 0
 	  || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 	{
-	  if (GET_CODE (op0) != REG)
+	  if (!REG_P (op0))
 	    op0 = copy_to_reg (op0);
 	  op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 		                op0, (offset * UNITS_PER_WORD));
@@ -1268,7 +1362,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
     {
       if (HAVE_extzv
 	  && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
-	  && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
+	  && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 		&& (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
 	{
 	  unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
@@ -1336,7 +1430,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 	     SImode). to make it acceptable to the format of extzv.  */
 	  if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
 	    goto extzv_loses;
-	  if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
+	  if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 	    xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 
 	  /* On big-endian machines, we count bits from the most significant.
@@ -1356,7 +1450,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 
 	  if (GET_MODE (xtarget) != maxmode)
 	    {
-	      if (GET_CODE (xtarget) == REG)
+	      if (REG_P (xtarget))
 		{
 		  int wider = (GET_MODE_SIZE (maxmode)
 			       > GET_MODE_SIZE (GET_MODE (xtarget)));
@@ -1402,7 +1496,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
     {
       if (HAVE_extv
 	  && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
-	  && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
+	  && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 		&& (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
 	{
 	  int xbitpos = bitpos, xoffset = offset;
@@ -1464,7 +1558,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 	     SImode) to make it acceptable to the format of extv.  */
 	  if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
 	    goto extv_loses;
-	  if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
+	  if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 	    xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 
 	  /* On big-endian machines, we count bits from the most significant.
@@ -1485,7 +1579,7 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 
 	  if (GET_MODE (xtarget) != maxmode)
 	    {
-	      if (GET_CODE (xtarget) == REG)
+	      if (REG_P (xtarget))
 		{
 		  int wider = (GET_MODE_SIZE (maxmode)
 			       > GET_MODE_SIZE (GET_MODE (xtarget)));
@@ -1567,17 +1661,16 @@ extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
 
 static rtx
-extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
-			 target, unsignedp)
-     enum machine_mode tmode;
-     rtx op0, target;
-     unsigned HOST_WIDE_INT offset, bitsize, bitpos;
-     int unsignedp;
+extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
+			 unsigned HOST_WIDE_INT offset,
+			 unsigned HOST_WIDE_INT bitsize,
+			 unsigned HOST_WIDE_INT bitpos, rtx target,
+			 int unsignedp)
 {
   unsigned int total_bits = BITS_PER_WORD;
   enum machine_mode mode;
 
-  if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
+  if (GET_CODE (op0) == SUBREG || REG_P (op0))
     {
       /* Special treatment for a bit field split across two registers.  */
       if (bitsize + bitpos > BITS_PER_WORD)
@@ -1639,9 +1732,7 @@ extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
 	  tree amount = build_int_2 (bitpos, 0);
 	  /* Maybe propagate the target for the shift.  */
 	  /* But not if we will return it--could confuse integrate.c.  */
-	  rtx subtarget = (target != 0 && GET_CODE (target) == REG
-			   && !REG_FUNCTION_VALUE_P (target)
-			   ? target : 0);
+	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
 	  if (tmode != mode) subtarget = 0;
 	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
 	}
@@ -1680,10 +1771,7 @@ extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
       tree amount
 	= build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
       /* Maybe propagate the target for the shift.  */
-      /* But not if we will return the result--could confuse integrate.c.  */
-      rtx subtarget = (target != 0 && GET_CODE (target) == REG
-		       && ! REG_FUNCTION_VALUE_P (target)
-		       ? target : 0);
+      rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
     }
 
@@ -1699,13 +1787,13 @@ extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
    BITSIZE+BITPOS is too small for MODE.  */
 
 static rtx
-mask_rtx (mode, bitpos, bitsize, complement)
-     enum machine_mode mode;
-     int bitpos, bitsize, complement;
+mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
 {
   HOST_WIDE_INT masklow, maskhigh;
 
-  if (bitpos < HOST_BITS_PER_WIDE_INT)
+  if (bitsize == 0)
+    masklow = 0;
+  else if (bitpos < HOST_BITS_PER_WIDE_INT)
     masklow = (HOST_WIDE_INT) -1 << bitpos;
   else
     masklow = 0;
@@ -1719,7 +1807,9 @@ mask_rtx (mode, bitpos, bitsize, complement)
   else
     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
 
-  if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
+  if (bitsize == 0)
+    maskhigh = 0;
+  else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
     maskhigh &= ((unsigned HOST_WIDE_INT) -1
 		 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
   else
@@ -1738,10 +1828,7 @@ mask_rtx (mode, bitpos, bitsize, complement)
    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
 
 static rtx
-lshift_value (mode, value, bitpos, bitsize)
-     enum machine_mode mode;
-     rtx value;
-     int bitpos, bitsize;
+lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
 {
   unsigned HOST_WIDE_INT v = INTVAL (value);
   HOST_WIDE_INT low, high;
@@ -1771,10 +1858,8 @@ lshift_value (mode, value, bitpos, bitsize)
    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
 
 static rtx
-extract_split_bit_field (op0, bitsize, bitpos, unsignedp)
-     rtx op0;
-     unsigned HOST_WIDE_INT bitsize, bitpos;
-     int unsignedp;
+extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
+			 unsigned HOST_WIDE_INT bitpos, int unsignedp)
 {
   unsigned int unit;
   unsigned int bitsdone = 0;
@@ -1783,7 +1868,7 @@ extract_split_bit_field (op0, bitsize, bitpos, unsignedp)
 
   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
      much at a time.  */
-  if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
+  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
     unit = BITS_PER_WORD;
   else
     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
@@ -1817,7 +1902,7 @@ extract_split_bit_field (op0, bitsize, bitpos, unsignedp)
 					GET_MODE (SUBREG_REG (op0)));
 	  offset = 0;
 	}
-      else if (GET_CODE (op0) == REG)
+      else if (REG_P (op0))
 	{
 	  word = operand_subword_force (op0, offset, GET_MODE (op0));
 	  offset = 0;
@@ -1873,8 +1958,7 @@ extract_split_bit_field (op0, bitsize, bitpos, unsignedp)
 /* Add INC into TARGET.  */
 
 void
-expand_inc (target, inc)
-     rtx target, inc;
+expand_inc (rtx target, rtx inc)
 {
   rtx value = expand_binop (GET_MODE (target), add_optab,
 			    target, inc,
@@ -1886,8 +1970,7 @@ expand_inc (target, inc)
 /* Subtract DEC from TARGET.  */
 
 void
-expand_dec (target, dec)
-     rtx target, dec;
+expand_dec (rtx target, rtx dec)
 {
   rtx value = expand_binop (GET_MODE (target), sub_optab,
 			    target, dec,
@@ -1904,13 +1987,8 @@ expand_dec (target, dec)
    Return the rtx for where the value is.  */
 
 rtx
-expand_shift (code, mode, shifted, amount, target, unsignedp)
-     enum tree_code code;
-     enum machine_mode mode;
-     rtx shifted;
-     tree amount;
-     rtx target;
-     int unsignedp;
+expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
+	      tree amount, rtx target, int unsignedp)
 {
   rtx op1, temp = 0;
   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
@@ -1923,19 +2001,17 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
 
   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
 
-#ifdef SHIFT_COUNT_TRUNCATED
   if (SHIFT_COUNT_TRUNCATED)
     {
       if (GET_CODE (op1) == CONST_INT
-          && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
+	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
 	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
-        op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
+	op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
 		       % GET_MODE_BITSIZE (mode));
       else if (GET_CODE (op1) == SUBREG
 	       && subreg_lowpart_p (op1))
 	op1 = SUBREG_REG (op1);
     }
-#endif
 
   if (op1 == const0_rtx)
     return shifted;
@@ -2082,30 +2158,40 @@ struct algorithm
   char log[MAX_BITS_PER_WORD];
 };
 
-static void synth_mult			PARAMS ((struct algorithm *,
-						 unsigned HOST_WIDE_INT,
-						 int));
-static unsigned HOST_WIDE_INT choose_multiplier PARAMS ((unsigned HOST_WIDE_INT,
-							 int, int,
-							 unsigned HOST_WIDE_INT *,
-							 int *, int *));
-static unsigned HOST_WIDE_INT invert_mod2n	PARAMS ((unsigned HOST_WIDE_INT,
-							 int));
+/* Indicates the type of fixup needed after a constant multiplication.
+   BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
+   the result should be negated, and ADD_VARIANT means that the
+   multiplicand should be added to the result.  */
+enum mult_variant {basic_variant, negate_variant, add_variant};
+
+static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
+			int, enum machine_mode mode);
+static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
+				 struct algorithm *, enum mult_variant *, int);
+static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
+			      const struct algorithm *, enum mult_variant);
+static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
+						 int, unsigned HOST_WIDE_INT *,
+						 int *, int *);
+static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
+static rtx extract_high_half (enum machine_mode, rtx);
+static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
+				       int, int);
 /* Compute and return the best algorithm for multiplying by T.
    The algorithm must cost less than cost_limit
    If retval.cost >= COST_LIMIT, no algorithm was found and all
-   other field of the returned struct are undefined.  */
+   other field of the returned struct are undefined.
+   MODE is the machine mode of the multiplication.  */
 
 static void
-synth_mult (alg_out, t, cost_limit)
-     struct algorithm *alg_out;
-     unsigned HOST_WIDE_INT t;
-     int cost_limit;
+synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
+	    int cost_limit, enum machine_mode mode)
 {
   int m;
   struct algorithm *alg_in, *best_alg;
   int cost;
   unsigned HOST_WIDE_INT q;
+  int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 
   /* Indicate that no algorithm is yet found.  If no algorithm
      is found, this value will be returned and indicate failure.  */
@@ -2114,6 +2200,9 @@ synth_mult (alg_out, t, cost_limit)
   if (cost_limit <= 0)
     return;
 
+  /* Restrict the bits of "t" to the multiplication's mode.  */
+  t &= GET_MODE_MASK (mode);
+
   /* t == 1 can be done in zero cost.  */
   if (t == 1)
     {
@@ -2140,8 +2229,8 @@ synth_mult (alg_out, t, cost_limit)
 
   /* We'll be needing a couple extra algorithm structures now.  */
 
-  alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
-  best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
+  alg_in = alloca (sizeof (struct algorithm));
+  best_alg = alloca (sizeof (struct algorithm));
 
   /* If we have a group of zero bits at the low-order part of T, try
      multiplying by the remaining bits and then doing a shift.  */
@@ -2149,11 +2238,11 @@ synth_mult (alg_out, t, cost_limit)
   if ((t & 1) == 0)
     {
       m = floor_log2 (t & -t);	/* m = number of low zero bits */
-      if (m < BITS_PER_WORD)
+      if (m < maxm)
 	{
 	  q = t >> m;
-	  cost = shift_cost[m];
-	  synth_mult (alg_in, q, cost_limit - cost);
+	  cost = shift_cost[mode][m];
+	  synth_mult (alg_in, q, cost_limit - cost, mode);
 
 	  cost += alg_in->cost;
 	  if (cost < cost_limit)
@@ -2187,8 +2276,8 @@ synth_mult (alg_out, t, cost_limit)
 	{
 	  /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
 
-	  cost = add_cost;
-	  synth_mult (alg_in, t + 1, cost_limit - cost);
+	  cost = add_cost[mode];
+	  synth_mult (alg_in, t + 1, cost_limit - cost, mode);
 
 	  cost += alg_in->cost;
 	  if (cost < cost_limit)
@@ -2204,8 +2293,8 @@ synth_mult (alg_out, t, cost_limit)
 	{
 	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
 
-	  cost = add_cost;
-	  synth_mult (alg_in, t - 1, cost_limit - cost);
+	  cost = add_cost[mode];
+	  synth_mult (alg_in, t - 1, cost_limit - cost, mode);
 
 	  cost += alg_in->cost;
 	  if (cost < cost_limit)
@@ -2234,10 +2323,12 @@ synth_mult (alg_out, t, cost_limit)
       unsigned HOST_WIDE_INT d;
 
       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
-      if (t % d == 0 && t > d && m < BITS_PER_WORD)
+      if (t % d == 0 && t > d && m < maxm)
 	{
-	  cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
-	  synth_mult (alg_in, t / d, cost_limit - cost);
+	  cost = add_cost[mode] + shift_cost[mode][m];
+	  if (shiftadd_cost[mode][m] < cost)
+	    cost = shiftadd_cost[mode][m];
+	  synth_mult (alg_in, t / d, cost_limit - cost, mode);
 
 	  cost += alg_in->cost;
 	  if (cost < cost_limit)
@@ -2253,10 +2344,12 @@ synth_mult (alg_out, t, cost_limit)
 	}
 
       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
-      if (t % d == 0 && t > d && m < BITS_PER_WORD)
+      if (t % d == 0 && t > d && m < maxm)
 	{
-	  cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
-	  synth_mult (alg_in, t / d, cost_limit - cost);
+	  cost = add_cost[mode] + shift_cost[mode][m];
+	  if (shiftsub_cost[mode][m] < cost)
+	    cost = shiftsub_cost[mode][m];
+	  synth_mult (alg_in, t / d, cost_limit - cost, mode);
 
 	  cost += alg_in->cost;
 	  if (cost < cost_limit)
@@ -2278,10 +2371,10 @@ synth_mult (alg_out, t, cost_limit)
       q = t - 1;
       q = q & -q;
       m = exact_log2 (q);
-      if (m >= 0 && m < BITS_PER_WORD)
+      if (m >= 0 && m < maxm)
 	{
-	  cost = shiftadd_cost[m];
-	  synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
+	  cost = shiftadd_cost[mode][m];
+	  synth_mult (alg_in, (t - 1) >> m, cost_limit - cost, mode);
 
 	  cost += alg_in->cost;
 	  if (cost < cost_limit)
@@ -2297,10 +2390,10 @@ synth_mult (alg_out, t, cost_limit)
       q = t + 1;
       q = q & -q;
       m = exact_log2 (q);
-      if (m >= 0 && m < BITS_PER_WORD)
+      if (m >= 0 && m < maxm)
 	{
-	  cost = shiftsub_cost[m];
-	  synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
+	  cost = shiftsub_cost[mode][m];
+	  synth_mult (alg_in, (t + 1) >> m, cost_limit - cost, mode);
 
 	  cost += alg_in->cost;
 	  if (cost < cost_limit)
@@ -2335,6 +2428,198 @@ synth_mult (alg_out, t, cost_limit)
 	  alg_out->ops * sizeof *alg_out->log);
 }
 
+/* Find the cheapest way of multiplying a value of mode MODE by VAL.
+   Try three variations:
+
+       - a shift/add sequence based on VAL itself
+       - a shift/add sequence based on -VAL, followed by a negation
+       - a shift/add sequence based on VAL - 1, followed by an addition.
+
+   Return true if the cheapest of these cost less than MULT_COST,
+   describing the algorithm in *ALG and final fixup in *VARIANT.  */
+
+static bool
+choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
+		     struct algorithm *alg, enum mult_variant *variant,
+		     int mult_cost)
+{
+  struct algorithm alg2;
+
+  *variant = basic_variant;
+  synth_mult (alg, val, mult_cost, mode);
+
+  /* This works only if the inverted value actually fits in an
+     `unsigned int' */
+  if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
+    {
+      synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - neg_cost[mode],
+		  mode);
+      alg2.cost += neg_cost[mode];
+      if (alg2.cost < alg->cost)
+	*alg = alg2, *variant = negate_variant;
+    }
+
+  /* This proves very useful for division-by-constant.  */
+  synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost[mode],
+	      mode);
+  alg2.cost += add_cost[mode];
+  if (alg2.cost < alg->cost)
+    *alg = alg2, *variant = add_variant;
+
+  return alg->cost < mult_cost;
+}
+
+/* A subroutine of expand_mult, used for constant multiplications.
+   Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
+   convenient.  Use the shift/add sequence described by ALG and apply
+   the final fixup specified by VARIANT.  */
+
+static rtx
+expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
+		   rtx target, const struct algorithm *alg,
+		   enum mult_variant variant)
+{
+  HOST_WIDE_INT val_so_far;
+  rtx insn, accum, tem;
+  int opno;
+  enum machine_mode nmode;
+
+  /* op0 must be register to make mult_cost match the precomputed
+     shiftadd_cost array.  */
+  op0 = protect_from_queue (op0, 0);
+
+  /* Avoid referencing memory over and over.
+     For speed, but also for correctness when mem is volatile.  */
+  if (GET_CODE (op0) == MEM)
+    op0 = force_reg (mode, op0);
+
+  /* ACCUM starts out either as OP0 or as a zero, depending on
+     the first operation.  */
+
+  if (alg->op[0] == alg_zero)
+    {
+      accum = copy_to_mode_reg (mode, const0_rtx);
+      val_so_far = 0;
+    }
+  else if (alg->op[0] == alg_m)
+    {
+      accum = copy_to_mode_reg (mode, op0);
+      val_so_far = 1;
+    }
+  else
+    abort ();
+
+  for (opno = 1; opno < alg->ops; opno++)
+    {
+      int log = alg->log[opno];
+      int preserve = preserve_subexpressions_p ();
+      rtx shift_subtarget = preserve ? 0 : accum;
+      rtx add_target
+	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
+	   && ! preserve)
+	  ? target : 0;
+      rtx accum_target = preserve ? 0 : accum;
+
+      switch (alg->op[opno])
+	{
+	case alg_shift:
+	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
+				build_int_2 (log, 0), NULL_RTX, 0);
+	  val_so_far <<= log;
+	  break;
+
+	case alg_add_t_m2:
+	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
+			      build_int_2 (log, 0), NULL_RTX, 0);
+	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
+				 add_target ? add_target : accum_target);
+	  val_so_far += (HOST_WIDE_INT) 1 << log;
+	  break;
+
+	case alg_sub_t_m2:
+	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
+			      build_int_2 (log, 0), NULL_RTX, 0);
+	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
+				 add_target ? add_target : accum_target);
+	  val_so_far -= (HOST_WIDE_INT) 1 << log;
+	  break;
+
+	case alg_add_t2_m:
+	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
+				build_int_2 (log, 0), shift_subtarget,
+				0);
+	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
+				 add_target ? add_target : accum_target);
+	  val_so_far = (val_so_far << log) + 1;
+	  break;
+
+	case alg_sub_t2_m:
+	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
+				build_int_2 (log, 0), shift_subtarget, 0);
+	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
+				 add_target ? add_target : accum_target);
+	  val_so_far = (val_so_far << log) - 1;
+	  break;
+
+	case alg_add_factor:
+	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
+			      build_int_2 (log, 0), NULL_RTX, 0);
+	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
+				 add_target ? add_target : accum_target);
+	  val_so_far += val_so_far << log;
+	  break;
+
+	case alg_sub_factor:
+	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
+			      build_int_2 (log, 0), NULL_RTX, 0);
+	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
+				 (add_target ? add_target
+				  : preserve ? 0 : tem));
+	  val_so_far = (val_so_far << log) - val_so_far;
+	  break;
+
+	default:
+	  abort ();
+	}
+
+      /* Write a REG_EQUAL note on the last insn so that we can cse
+	 multiplication sequences.  Note that if ACCUM is a SUBREG,
+	 we've set the inner register and must properly indicate
+	 that.  */
+
+      tem = op0, nmode = mode;
+      if (GET_CODE (accum) == SUBREG)
+	{
+	  nmode = GET_MODE (SUBREG_REG (accum));
+	  tem = gen_lowpart (nmode, op0);
+	}
+
+      insn = get_last_insn ();
+      set_unique_reg_note (insn, REG_EQUAL,
+			   gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
+    }
+
+  if (variant == negate_variant)
+    {
+      val_so_far = -val_so_far;
+      accum = expand_unop (mode, neg_optab, accum, target, 0);
+    }
+  else if (variant == add_variant)
+    {
+      val_so_far = val_so_far + 1;
+      accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
+    }
+
+  /* Compare only the bits of val and val_so_far that are significant
+     in the result mode, to avoid sign-/zero-extension confusion.  */
+  val &= GET_MODE_MASK (mode);
+  val_so_far &= GET_MODE_MASK (mode);
+  if (val != val_so_far)
+    abort ();
+
+  return accum;
+}
+
 /* Perform a multiplication and return an rtx for the result.
    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
    TARGET is a suggestion for where to store the result (an rtx).
@@ -2344,12 +2629,12 @@ synth_mult (alg_out, t, cost_limit)
    you should swap the two operands if OP0 would be constant.  */
 
 rtx
-expand_mult (mode, op0, op1, target, unsignedp)
-     enum machine_mode mode;
-     rtx op0, op1, target;
-     int unsignedp;
+expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
+	     int unsignedp)
 {
   rtx const_op1 = op1;
+  enum mult_variant variant;
+  struct algorithm algorithm;
 
   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
      less than or equal in size to `unsigned int' this doesn't matter.
@@ -2376,188 +2661,36 @@ expand_mult (mode, op0, op1, target, unsignedp)
      that it seems better to use synth_mult always.  */
 
   if (const_op1 && GET_CODE (const_op1) == CONST_INT
-      && (unsignedp || ! flag_trapv))
+      && (unsignedp || !flag_trapv))
     {
-      struct algorithm alg;
-      struct algorithm alg2;
-      HOST_WIDE_INT val = INTVAL (op1);
-      HOST_WIDE_INT val_so_far;
-      rtx insn;
-      int mult_cost;
-      enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
-
-      /* op0 must be register to make mult_cost match the precomputed
-         shiftadd_cost array.  */
-      op0 = force_reg (mode, op0);
-
-      /* Try to do the computation three ways: multiply by the negative of OP1
-	 and then negate, do the multiplication directly, or do multiplication
-	 by OP1 - 1.  */
-
-      mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
-      mult_cost = MIN (12 * add_cost, mult_cost);
-
-      synth_mult (&alg, val, mult_cost);
-
-      /* This works only if the inverted value actually fits in an
-	 `unsigned int' */
-      if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
-	{
-	  synth_mult (&alg2, - val,
-		      (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
-	  if (alg2.cost + negate_cost < alg.cost)
-	    alg = alg2, variant = negate_variant;
-	}
-
-      /* This proves very useful for division-by-constant.  */
-      synth_mult (&alg2, val - 1,
-		  (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
-      if (alg2.cost + add_cost < alg.cost)
-	alg = alg2, variant = add_variant;
-
-      if (alg.cost < mult_cost)
-	{
-	  /* We found something cheaper than a multiply insn.  */
-	  int opno;
-	  rtx accum, tem;
-	  enum machine_mode nmode;
-
-	  op0 = protect_from_queue (op0, 0);
-
-	  /* Avoid referencing memory over and over.
-	     For speed, but also for correctness when mem is volatile.  */
-	  if (GET_CODE (op0) == MEM)
-	    op0 = force_reg (mode, op0);
-
-	  /* ACCUM starts out either as OP0 or as a zero, depending on
-	     the first operation.  */
-
-	  if (alg.op[0] == alg_zero)
-	    {
-	      accum = copy_to_mode_reg (mode, const0_rtx);
-	      val_so_far = 0;
-	    }
-	  else if (alg.op[0] == alg_m)
-	    {
-	      accum = copy_to_mode_reg (mode, op0);
-	      val_so_far = 1;
-	    }
-	  else
-	    abort ();
+      int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
+      mult_cost = MIN (12 * add_cost[mode], mult_cost);
 
-	  for (opno = 1; opno < alg.ops; opno++)
-	    {
-	      int log = alg.log[opno];
-	      int preserve = preserve_subexpressions_p ();
-	      rtx shift_subtarget = preserve ? 0 : accum;
-	      rtx add_target
-		= (opno == alg.ops - 1 && target != 0 && variant != add_variant
-		   && ! preserve)
-		  ? target : 0;
-	      rtx accum_target = preserve ? 0 : accum;
-
-	      switch (alg.op[opno])
-		{
-		case alg_shift:
-		  accum = expand_shift (LSHIFT_EXPR, mode, accum,
-					build_int_2 (log, 0), NULL_RTX, 0);
-		  val_so_far <<= log;
-		  break;
-
-		case alg_add_t_m2:
-		  tem = expand_shift (LSHIFT_EXPR, mode, op0,
-				      build_int_2 (log, 0), NULL_RTX, 0);
-		  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
-					 add_target
-					 ? add_target : accum_target);
-		  val_so_far += (HOST_WIDE_INT) 1 << log;
-		  break;
-
-		case alg_sub_t_m2:
-		  tem = expand_shift (LSHIFT_EXPR, mode, op0,
-				      build_int_2 (log, 0), NULL_RTX, 0);
-		  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
-					 add_target
-					 ? add_target : accum_target);
-		  val_so_far -= (HOST_WIDE_INT) 1 << log;
-		  break;
-
-		case alg_add_t2_m:
-		  accum = expand_shift (LSHIFT_EXPR, mode, accum,
-					build_int_2 (log, 0), shift_subtarget,
-					0);
-		  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
-					 add_target
-					 ? add_target : accum_target);
-		  val_so_far = (val_so_far << log) + 1;
-		  break;
-
-		case alg_sub_t2_m:
-		  accum = expand_shift (LSHIFT_EXPR, mode, accum,
-					build_int_2 (log, 0), shift_subtarget,
-					0);
-		  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
-					 add_target
-					 ? add_target : accum_target);
-		  val_so_far = (val_so_far << log) - 1;
-		  break;
-
-		case alg_add_factor:
-		  tem = expand_shift (LSHIFT_EXPR, mode, accum,
-				      build_int_2 (log, 0), NULL_RTX, 0);
-		  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
-					 add_target
-					 ? add_target : accum_target);
-		  val_so_far += val_so_far << log;
-		  break;
-
-		case alg_sub_factor:
-		  tem = expand_shift (LSHIFT_EXPR, mode, accum,
-				      build_int_2 (log, 0), NULL_RTX, 0);
-		  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
-					 (add_target ? add_target
-					  : preserve ? 0 : tem));
-		  val_so_far = (val_so_far << log) - val_so_far;
-		  break;
-
-		default:
-		  abort ();
-		}
-
-	      /* Write a REG_EQUAL note on the last insn so that we can cse
-		 multiplication sequences.  Note that if ACCUM is a SUBREG,
-		 we've set the inner register and must properly indicate
-		 that.  */
-
-	      tem = op0, nmode = mode;
-	      if (GET_CODE (accum) == SUBREG)
-		{
-		  nmode = GET_MODE (SUBREG_REG (accum));
-		  tem = gen_lowpart (nmode, op0);
-		}
-
-	      insn = get_last_insn ();
-	      set_unique_reg_note (insn,
-	      			   REG_EQUAL,
-				   gen_rtx_MULT (nmode, tem,
-				   	         GEN_INT (val_so_far)));
-	    }
+      if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
+			       mult_cost))
+	return expand_mult_const (mode, op0, INTVAL (const_op1), target,
+				  &algorithm, variant);
+    }
 
-	  if (variant == negate_variant)
-	    {
-	      val_so_far = - val_so_far;
-	      accum = expand_unop (mode, neg_optab, accum, target, 0);
-	    }
-	  else if (variant == add_variant)
-	    {
-	      val_so_far = val_so_far + 1;
-	      accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
-	    }
+  if (GET_CODE (op0) == CONST_DOUBLE)
+    {
+      rtx temp = op0;
+      op0 = op1;
+      op1 = temp;
+    }
 
-	  if (val != val_so_far)
-	    abort ();
+  /* Expand x*2.0 as x+x.  */
+  if (GET_CODE (op1) == CONST_DOUBLE
+      && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      REAL_VALUE_TYPE d;
+      REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
 
-	  return accum;
+      if (REAL_VALUES_EQUAL (d, dconst2))
+	{
+	  op0 = force_reg (GET_MODE (op0), op0);
+	  return expand_binop (mode, add_optab, op0, op0,
+			       target, unsignedp, OPTAB_LIB_WIDEN);
 	}
     }
 
@@ -2565,8 +2698,8 @@ expand_mult (mode, op0, op1, target, unsignedp)
      there is no difference between signed and unsigned.  */
   op0 = expand_binop (mode,
 		      ! unsignedp
-                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
-                       ? smulv_optab : smul_optab,
+		      && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
+		      ? smulv_optab : smul_optab,
 		      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
   if (op0 == 0)
     abort ();
@@ -2576,8 +2709,7 @@ expand_mult (mode, op0, op1, target, unsignedp)
 /* Return the smallest n such that 2**n >= X.  */
 
 int
-ceil_log2 (x)
-     unsigned HOST_WIDE_INT x;
+ceil_log2 (unsigned HOST_WIDE_INT x)
 {
   return floor_log2 (x - 1) + 1;
 }
@@ -2600,13 +2732,9 @@ ceil_log2 (x)
 
 static
 unsigned HOST_WIDE_INT
-choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
-     unsigned HOST_WIDE_INT d;
-     int n;
-     int precision;
-     unsigned HOST_WIDE_INT *multiplier_ptr;
-     int *post_shift_ptr;
-     int *lgup_ptr;
+choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
+		   unsigned HOST_WIDE_INT *multiplier_ptr,
+		   int *post_shift_ptr, int *lgup_ptr)
 {
   HOST_WIDE_INT mhigh_hi, mlow_hi;
   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
@@ -2657,14 +2785,14 @@ choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
     abort ();
   if (mhigh_hi > 1 || mlow_hi > 1)
     abort ();
-  /* assert that mlow < mhigh.  */
+  /* Assert that mlow < mhigh.  */
   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
     abort ();
 
   /* If precision == N, then mlow, mhigh exceed 2^N
      (but they do not exceed 2^(N+1)).  */
 
-  /* Reduce to lowest terms */
+  /* Reduce to lowest terms.  */
   for (post_shift = lgup; post_shift > 0; post_shift--)
     {
       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
@@ -2697,9 +2825,7 @@ choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
    congruent to 1 (mod 2**N).  */
 
 static unsigned HOST_WIDE_INT
-invert_mod2n (x, n)
-     unsigned HOST_WIDE_INT x;
-     int n;
+invert_mod2n (unsigned HOST_WIDE_INT x, int n)
 {
   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
 
@@ -2734,10 +2860,8 @@ invert_mod2n (x, n)
    MODE is the mode of operation.  */
 
 rtx
-expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
-     enum machine_mode mode;
-     rtx adj_operand, op0, op1, target;
-     int unsignedp;
+expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
+			     rtx op1, rtx target, int unsignedp)
 {
   rtx tem;
   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
@@ -2760,151 +2884,177 @@ expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
   return target;
 }
 
-/* Emit code to multiply OP0 and CNST1, putting the high half of the result
-   in TARGET if that is convenient, and return where the result is.  If the
-   operation can not be performed, 0 is returned.
+/* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
 
-   MODE is the mode of operation and result.
+static rtx
+extract_high_half (enum machine_mode mode, rtx op)
+{
+  enum machine_mode wider_mode;
 
-   UNSIGNEDP nonzero means unsigned multiply.
+  if (mode == word_mode)
+    return gen_highpart (mode, op);
 
-   MAX_COST is the total allowed cost for the expanded RTL.  */
+  wider_mode = GET_MODE_WIDER_MODE (mode);
+  op = expand_shift (RSHIFT_EXPR, wider_mode, op,
+		     build_int_2 (GET_MODE_BITSIZE (mode), 0), 0, 1);
+  return convert_modes (mode, wider_mode, op, 0);
+}
 
-rtx
-expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
-     enum machine_mode mode;
-     rtx op0, target;
-     unsigned HOST_WIDE_INT cnst1;
-     int unsignedp;
-     int max_cost;
+/* Like expand_mult_highpart, but only consider using a multiplication
+   optab.  OP1 is an rtx for the constant operand.  */
+
+static rtx
+expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
+			    rtx target, int unsignedp, int max_cost)
 {
-  enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
-  optab mul_highpart_optab;
+  rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
+  enum machine_mode wider_mode;
   optab moptab;
   rtx tem;
-  int size = GET_MODE_BITSIZE (mode);
-  rtx op1, wide_op1;
-
-  /* We can't support modes wider than HOST_BITS_PER_INT.  */
-  if (size > HOST_BITS_PER_WIDE_INT)
-    abort ();
-
-  op1 = gen_int_mode (cnst1, mode);
-
-  wide_op1
-    = immed_double_const (cnst1,
-			  (unsignedp
-			   ? (HOST_WIDE_INT) 0
-			   : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
-			  wider_mode);
-
-  /* expand_mult handles constant multiplication of word_mode
-     or narrower.  It does a poor job for large modes.  */
-  if (size < BITS_PER_WORD
-      && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
-    {
-      /* We have to do this, since expand_binop doesn't do conversion for
-	 multiply.  Maybe change expand_binop to handle widening multiply?  */
-      op0 = convert_to_mode (wider_mode, op0, unsignedp);
-
-      /* We know that this can't have signed overflow, so pretend this is
-         an unsigned multiply.  */
-      tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
-      tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-			  build_int_2 (size, 0), NULL_RTX, 1);
-      return convert_modes (mode, wider_mode, tem, unsignedp);
-    }
+  int size;
 
-  if (target == 0)
-    target = gen_reg_rtx (mode);
+  wider_mode = GET_MODE_WIDER_MODE (mode);
+  size = GET_MODE_BITSIZE (mode);
 
   /* Firstly, try using a multiplication insn that only generates the needed
      high part of the product, and in the sign flavor of unsignedp.  */
-  if (mul_highpart_cost[(int) mode] < max_cost)
+  if (mul_highpart_cost[mode] < max_cost)
     {
-      mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
-      target = expand_binop (mode, mul_highpart_optab,
-			     op0, op1, target, unsignedp, OPTAB_DIRECT);
-      if (target)
-	return target;
+      moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
+      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
+			  unsignedp, OPTAB_DIRECT);
+      if (tem)
+	return tem;
     }
 
   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
      Need to adjust the result after the multiplication.  */
   if (size - 1 < BITS_PER_WORD
-      && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
-	  < max_cost))
+      && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
+	  + 4 * add_cost[mode] < max_cost))
     {
-      mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
-      target = expand_binop (mode, mul_highpart_optab,
-			     op0, op1, target, unsignedp, OPTAB_DIRECT);
-      if (target)
+      moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
+      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
+			  unsignedp, OPTAB_DIRECT);
+      if (tem)
 	/* We used the wrong signedness.  Adjust the result.  */
-	return expand_mult_highpart_adjust (mode, target, op0,
-					    op1, target, unsignedp);
+	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
+					    tem, unsignedp);
     }
 
   /* Try widening multiplication.  */
   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
-  if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
-      && mul_widen_cost[(int) wider_mode] < max_cost)
+  if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
+      && mul_widen_cost[wider_mode] < max_cost)
     {
-      op1 = force_reg (mode, op1);
-      goto try;
+      tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
+			  unsignedp, OPTAB_WIDEN);
+      if (tem)
+	return extract_high_half (mode, tem);
     }
 
   /* Try widening the mode and perform a non-widening multiplication.  */
   moptab = smul_optab;
-  if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
+  if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
       && size - 1 < BITS_PER_WORD
-      && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
+      && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
     {
-      op1 = wide_op1;
-      goto try;
+      tem = expand_binop (wider_mode, moptab, op0, op1, 0,
+			  unsignedp, OPTAB_WIDEN);
+      if (tem)
+	return extract_high_half (mode, tem);
     }
 
   /* Try widening multiplication of opposite signedness, and adjust.  */
   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
-  if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
+  if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
       && size - 1 < BITS_PER_WORD
-      && (mul_widen_cost[(int) wider_mode]
-	  + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
+      && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
+	  + 4 * add_cost[mode] < max_cost))
     {
-      rtx regop1 = force_reg (mode, op1);
-      tem = expand_binop (wider_mode, moptab, op0, regop1,
+      tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
       if (tem != 0)
 	{
-	  /* Extract the high half of the just generated product.  */
-	  tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-			      build_int_2 (size, 0), NULL_RTX, 1);
-	  tem = convert_modes (mode, wider_mode, tem, unsignedp);
+	  tem = extract_high_half (mode, tem);
 	  /* We used the wrong signedness.  Adjust the result.  */
-	  return expand_mult_highpart_adjust (mode, tem, op0, op1,
+	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
 					      target, unsignedp);
 	}
     }
 
   return 0;
+}
 
- try:
-  /* Pass NULL_RTX as target since TARGET has wrong mode.  */
-  tem = expand_binop (wider_mode, moptab, op0, op1,
-		      NULL_RTX, unsignedp, OPTAB_WIDEN);
-  if (tem == 0)
-    return 0;
+/* Emit code to multiply OP0 and CNST1, putting the high half of the result
+   in TARGET if that is convenient, and return where the result is.  If the
+   operation can not be performed, 0 is returned.
 
-  /* Extract the high half of the just generated product.  */
-  if (mode == word_mode)
+   MODE is the mode of operation and result.
+
+   UNSIGNEDP nonzero means unsigned multiply.
+
+   MAX_COST is the total allowed cost for the expanded RTL.  */
+
+rtx
+expand_mult_highpart (enum machine_mode mode, rtx op0,
+		      unsigned HOST_WIDE_INT cnst1, rtx target,
+		      int unsignedp, int max_cost)
+{
+  enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
+  int extra_cost;
+  bool sign_adjust = false;
+  enum mult_variant variant;
+  struct algorithm alg;
+  rtx op1, tem;
+
+  /* We can't support modes wider than HOST_BITS_PER_INT.  */
+  if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
+    abort ();
+
+  op1 = gen_int_mode (cnst1, wider_mode);
+  cnst1 &= GET_MODE_MASK (mode);
+
+  /* We can't optimize modes wider than BITS_PER_WORD. 
+     ??? We might be able to perform double-word arithmetic if 
+     mode == word_mode, however all the cost calculations in
+     synth_mult etc. assume single-word operations.  */
+  if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
+    return expand_mult_highpart_optab (mode, op0, op1, target,
+				       unsignedp, max_cost);
+
+  extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
+
+  /* Check whether we try to multiply by a negative constant.  */
+  if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
     {
-      return gen_highpart (mode, tem);
+      sign_adjust = true;
+      extra_cost += add_cost[mode];
     }
-  else
+
+  /* See whether shift/add multiplication is cheap enough.  */
+  if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
+			   max_cost - extra_cost))
     {
-      tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-			  build_int_2 (size, 0), NULL_RTX, 1);
-      return convert_modes (mode, wider_mode, tem, unsignedp);
+      /* See whether the specialized multiplication optabs are
+	 cheaper than the shift/add version.  */
+      tem = expand_mult_highpart_optab (mode, op0, op1, target,
+					unsignedp, alg.cost + extra_cost);
+      if (tem)
+	return tem;
+
+      tem = convert_to_mode (wider_mode, op0, unsignedp);
+      tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
+      tem = extract_high_half (mode, tem);
+
+      /* Adjust result for signedness.  */
+      if (sign_adjust)
+	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
+
+      return tem;
     }
+  return expand_mult_highpart_optab (mode, op0, op1, target,
+				     unsignedp, max_cost);
 }
 
 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
@@ -2931,7 +3081,7 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
    the result is exact for inputs up to 0x1fffffff.
    The input range can be reduced by using cross-sum rules.
    For odd divisors >= 3, the following table gives right shift counts
-   so that if an number is shifted by an integer multiple of the given
+   so that if a number is shifted by an integer multiple of the given
    amount, the remainder stays the same:
    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
@@ -2948,12 +3098,8 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
 
 rtx
-expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
-     int rem_flag;
-     enum tree_code code;
-     enum machine_mode mode;
-     rtx op0, op1, target;
-     int unsignedp;
+expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
+	       rtx op0, rtx op1, rtx target, int unsignedp)
 {
   enum machine_mode compute_mode;
   rtx tquotient;
@@ -2962,14 +3108,20 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
   int size;
   rtx insn, set;
   optab optab1, optab2;
-  int op1_is_constant, op1_is_pow2;
+  int op1_is_constant, op1_is_pow2 = 0;
   int max_cost, extra_cost;
   static HOST_WIDE_INT last_div_const = 0;
+  static HOST_WIDE_INT ext_op1;
 
   op1_is_constant = GET_CODE (op1) == CONST_INT;
-  op1_is_pow2 = (op1_is_constant
-		 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
-		      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1))))));
+  if (op1_is_constant)
+    {
+      ext_op1 = INTVAL (op1);
+      if (unsignedp)
+	ext_op1 &= GET_MODE_MASK (mode);
+      op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
+		     || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
+    }
 
   /*
      This is the structure of expand_divmod:
@@ -3011,9 +3163,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
   if (! unsignedp && op1 == constm1_rtx)
     {
       if (rem_flag)
-        return const0_rtx;
+	return const0_rtx;
       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
-                        ? negv_optab : neg_optab, op0, target, 0);
+			  ? negv_optab : neg_optab, op0, target, 0);
     }
 
   if (target
@@ -3049,21 +3201,24 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
      not straightforward to generalize this.  Maybe we should make an array
      of possible modes in init_expmed?  Save this for GCC 2.7.  */
 
-  optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab)
+  optab1 = ((op1_is_pow2 && op1 != const0_rtx)
+	    ? (unsignedp ? lshr_optab : ashr_optab)
 	    : (unsignedp ? udiv_optab : sdiv_optab));
-  optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab));
+  optab2 = ((op1_is_pow2 && op1 != const0_rtx)
+	    ? optab1
+	    : (unsignedp ? udivmod_optab : sdivmod_optab));
 
   for (compute_mode = mode; compute_mode != VOIDmode;
        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
-    if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
-	|| optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
+    if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
+	|| optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
       break;
 
   if (compute_mode == VOIDmode)
     for (compute_mode = mode; compute_mode != VOIDmode;
 	 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
-      if (optab1->handlers[(int) compute_mode].libfunc
-	  || optab2->handlers[(int) compute_mode].libfunc)
+      if (optab1->handlers[compute_mode].libfunc
+	  || optab2->handlers[compute_mode].libfunc)
 	break;
 
   /* If we still couldn't find a mode, use MODE, but we'll probably abort
@@ -3087,10 +3242,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
   /* Only deduct something for a REM if the last divide done was
      for a different constant.   Then set the constant of the last
      divide.  */
-  max_cost = div_cost[(int) compute_mode]
+  max_cost = div_cost[compute_mode]
     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
 		      && INTVAL (op1) == last_div_const)
-       ? mul_cost[(int) compute_mode] + add_cost : 0);
+       ? mul_cost[compute_mode] + add_cost[compute_mode]
+       : 0);
 
   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
 
@@ -3146,7 +3302,8 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 		unsigned HOST_WIDE_INT mh, ml;
 		int pre_shift, post_shift;
 		int dummy;
-		unsigned HOST_WIDE_INT d = INTVAL (op1);
+		unsigned HOST_WIDE_INT d = (INTVAL (op1)
+					    & GET_MODE_MASK (compute_mode));
 
 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
 		  {
@@ -3206,8 +3363,10 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 			    if (post_shift - 1 >= BITS_PER_WORD)
 			      goto fail1;
 
-			    extra_cost = (shift_cost[post_shift - 1]
-					  + shift_cost[1] + 2 * add_cost);
+			    extra_cost
+			      = (shift_cost[compute_mode][post_shift - 1]
+				 + shift_cost[compute_mode][1]
+				 + 2 * add_cost[compute_mode]);
 			    t1 = expand_mult_highpart (compute_mode, op0, ml,
 						       NULL_RTX, 1,
 						       max_cost - extra_cost);
@@ -3237,8 +3396,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 			    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
 					       build_int_2 (pre_shift, 0),
 					       NULL_RTX, 1);
-			    extra_cost = (shift_cost[pre_shift]
-					  + shift_cost[post_shift]);
+			    extra_cost
+			      = (shift_cost[compute_mode][pre_shift]
+				 + shift_cost[compute_mode][post_shift]);
 			    t2 = expand_mult_highpart (compute_mode, t1, ml,
 						       NULL_RTX, 1,
 						       max_cost - extra_cost);
@@ -3259,7 +3419,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 		    && (set = single_set (insn)) != 0
 		    && SET_DEST (set) == quotient)
 		  set_unique_reg_note (insn,
-		  		       REG_EQUAL,
+				       REG_EQUAL,
 				       gen_rtx_UDIV (compute_mode, op0, op1));
 	      }
 	    else		/* TRUNC_DIV, signed */
@@ -3290,15 +3450,16 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 		      goto fail1;
 		  }
 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
-			 && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap)
+			 && (rem_flag ? smod_pow2_cheap[compute_mode]
+				      : sdiv_pow2_cheap[compute_mode])
 			 /* ??? The cheap metric is computed only for
 			    word_mode.  If this operation is wider, this may
 			    not be so.  Assume true if the optab has an
 			    expander for this mode.  */
 			 && (((rem_flag ? smod_optab : sdiv_optab)
-			      ->handlers[(int) compute_mode].insn_code
+			      ->handlers[compute_mode].insn_code
 			      != CODE_FOR_nothing)
-			     || (sdivmod_optab->handlers[(int) compute_mode]
+			     || (sdivmod_optab->handlers[compute_mode]
 				 .insn_code != CODE_FOR_nothing)))
 		  ;
 		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
@@ -3347,7 +3508,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 			    && abs_d < ((unsigned HOST_WIDE_INT) 1
 					<< (HOST_BITS_PER_WIDE_INT - 1)))
 			  set_unique_reg_note (insn,
-			  		       REG_EQUAL,
+					       REG_EQUAL,
 					       gen_rtx_DIV (compute_mode,
 							    op0,
 							    GEN_INT
@@ -3371,8 +3532,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 			    || size - 1 >= BITS_PER_WORD)
 			  goto fail1;
 
-			extra_cost = (shift_cost[post_shift]
-				      + shift_cost[size - 1] + add_cost);
+			extra_cost = (shift_cost[compute_mode][post_shift]
+				      + shift_cost[compute_mode][size - 1]
+				      + add_cost[compute_mode]);
 			t1 = expand_mult_highpart (compute_mode, op0, ml,
 						   NULL_RTX, 0,
 						   max_cost - extra_cost);
@@ -3402,8 +3564,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 			  goto fail1;
 
 			ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
-			extra_cost = (shift_cost[post_shift]
-				      + shift_cost[size - 1] + 2 * add_cost);
+			extra_cost = (shift_cost[compute_mode][post_shift]
+				      + shift_cost[compute_mode][size - 1]
+				      + 2 * add_cost[compute_mode]);
 			t1 = expand_mult_highpart (compute_mode, op0, ml,
 						   NULL_RTX, 0,
 						   max_cost - extra_cost);
@@ -3438,7 +3601,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 		    && (set = single_set (insn)) != 0
 		    && SET_DEST (set) == quotient)
 		  set_unique_reg_note (insn,
-		  		       REG_EQUAL,
+				       REG_EQUAL,
 				       gen_rtx_DIV (compute_mode, op0, op1));
 	      }
 	    break;
@@ -3492,8 +3655,9 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 					   NULL_RTX, 0);
 			t2 = expand_binop (compute_mode, xor_optab, op0, t1,
 					   NULL_RTX, 0, OPTAB_WIDEN);
-			extra_cost = (shift_cost[post_shift]
-				      + shift_cost[size - 1] + 2 * add_cost);
+			extra_cost = (shift_cost[compute_mode][post_shift]
+				      + shift_cost[compute_mode][size - 1]
+				      + 2 * add_cost[compute_mode]);
 			t3 = expand_mult_highpart (compute_mode, t2, ml,
 						   NULL_RTX, 1,
 						   max_cost - extra_cost);
@@ -3549,13 +3713,13 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 	if (rem_flag)
 	  {
 	    remainder
-	      = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
+	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
 	    quotient = gen_reg_rtx (compute_mode);
 	  }
 	else
 	  {
 	    quotient
-	      = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
+	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
 	    remainder = gen_reg_rtx (compute_mode);
 	  }
 
@@ -3665,13 +3829,13 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 
 	    if (rem_flag)
 	      {
-		remainder = (GET_CODE (target) == REG
+		remainder = (REG_P (target)
 			     ? target : gen_reg_rtx (compute_mode));
 		quotient = gen_reg_rtx (compute_mode);
 	      }
 	    else
 	      {
-		quotient = (GET_CODE (target) == REG
+		quotient = (REG_P (target)
 			    ? target : gen_reg_rtx (compute_mode));
 		remainder = gen_reg_rtx (compute_mode);
 	      }
@@ -3762,13 +3926,13 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 	      target = gen_reg_rtx (compute_mode);
 	    if (rem_flag)
 	      {
-		remainder= (GET_CODE (target) == REG
+		remainder= (REG_P (target)
 			    ? target : gen_reg_rtx (compute_mode));
 		quotient = gen_reg_rtx (compute_mode);
 	      }
 	    else
 	      {
-		quotient = (GET_CODE (target) == REG
+		quotient = (REG_P (target)
 			    ? target : gen_reg_rtx (compute_mode));
 		remainder = gen_reg_rtx (compute_mode);
 	      }
@@ -3854,11 +4018,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 			       build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
 	    quotient = expand_mult (compute_mode, t1,
 				    gen_int_mode (ml, compute_mode),
-				    NULL_RTX, 0);
+				    NULL_RTX, 1);
 
 	    insn = get_last_insn ();
 	    set_unique_reg_note (insn,
-	    			 REG_EQUAL,
+				 REG_EQUAL,
 				 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
 						 compute_mode,
 						 op0, op1));
@@ -3943,7 +4107,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 	{
 	  /* Try to produce the remainder without producing the quotient.
 	     If we seem to have a divmod pattern that does not require widening,
-	     don't try widening here.  We should really have an WIDEN argument
+	     don't try widening here.  We should really have a WIDEN argument
 	     to expand_twoval_binop, since what we'd really like to do here is
 	     1) try a mod insn in compute_mode
 	     2) try a divmod insn in compute_mode
@@ -3954,7 +4118,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
 				 op0, op1, target,
 				 unsignedp,
-				 ((optab2->handlers[(int) compute_mode].insn_code
+				 ((optab2->handlers[compute_mode].insn_code
 				   != CODE_FOR_nothing)
 				  ? OPTAB_DIRECT : OPTAB_WIDEN));
 	  if (remainder == 0)
@@ -3982,7 +4146,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
 	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
 			     op0, op1, rem_flag ? NULL_RTX : target,
 			     unsignedp,
-			     ((optab2->handlers[(int) compute_mode].insn_code
+			     ((optab2->handlers[compute_mode].insn_code
 			       != CODE_FOR_nothing)
 			      ? OPTAB_DIRECT : OPTAB_WIDEN));
 
@@ -4037,9 +4201,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
    generated by loop.c.  */
 
 tree
-make_tree (type, x)
-     tree type;
-     rtx x;
+make_tree (tree type, rtx x)
 {
   tree t;
 
@@ -4047,8 +4209,9 @@ make_tree (type, x)
     {
     case CONST_INT:
       t = build_int_2 (INTVAL (x),
-		       (TREE_UNSIGNED (type)
-			&& (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
+		       (TYPE_UNSIGNED (type)
+			&& (GET_MODE_BITSIZE (TYPE_MODE (type))
+			    < HOST_BITS_PER_WIDE_INT))
 		       || INTVAL (x) >= 0 ? 0 : -1);
       TREE_TYPE (t) = type;
       return t;
@@ -4107,21 +4270,22 @@ make_tree (type, x)
 			  make_tree (type, XEXP (x, 1))));
 
     case LSHIFTRT:
+      t = lang_hooks.types.unsigned_type (type);
       return fold (convert (type,
-			    build (RSHIFT_EXPR, unsigned_type (type),
-				   make_tree (unsigned_type (type),
-					      XEXP (x, 0)),
+			    build (RSHIFT_EXPR, t,
+				   make_tree (t, XEXP (x, 0)),
 				   make_tree (type, XEXP (x, 1)))));
 
     case ASHIFTRT:
+      t = lang_hooks.types.signed_type (type);
       return fold (convert (type,
-			    build (RSHIFT_EXPR, signed_type (type),
-				   make_tree (signed_type (type), XEXP (x, 0)),
+			    build (RSHIFT_EXPR, t,
+				   make_tree (t, XEXP (x, 0)),
 				   make_tree (type, XEXP (x, 1)))));
 
     case DIV:
       if (TREE_CODE (type) != REAL_TYPE)
-	t = signed_type (type);
+	t = lang_hooks.types.signed_type (type);
       else
 	t = type;
 
@@ -4130,21 +4294,26 @@ make_tree (type, x)
 				   make_tree (t, XEXP (x, 0)),
 				   make_tree (t, XEXP (x, 1)))));
     case UDIV:
-      t = unsigned_type (type);
+      t = lang_hooks.types.unsigned_type (type);
       return fold (convert (type,
 			    build (TRUNC_DIV_EXPR, t,
 				   make_tree (t, XEXP (x, 0)),
 				   make_tree (t, XEXP (x, 1)))));
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
+					  GET_CODE (x) == ZERO_EXTEND);
+      return fold (convert (type, make_tree (t, XEXP (x, 0))));
+
    default:
       t = make_node (RTL_EXPR);
       TREE_TYPE (t) = type;
 
-#ifdef POINTERS_EXTEND_UNSIGNED
       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
 	 ptr_mode.  So convert.  */
-      if (POINTER_TYPE_P (type) && GET_MODE (x) != TYPE_MODE (type))
+      if (POINTER_TYPE_P (type))
 	x = convert_memory_address (TYPE_MODE (type), x);
-#endif
 
       RTL_EXPR_RTL (t) = x;
       /* There are no insns to be output
@@ -4154,23 +4323,57 @@ make_tree (type, x)
     }
 }
 
+/* Check whether the multiplication X * MULT + ADD overflows.
+   X, MULT and ADD must be CONST_*.
+   MODE is the machine mode for the computation.
+   X and MULT must have mode MODE.  ADD may have a different mode.
+   So can X (defaults to same as MODE).
+   UNSIGNEDP is nonzero to do unsigned multiplication.  */
+
+bool
+const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int unsignedp)
+{
+  tree type, mult_type, add_type, result;
+
+  type = lang_hooks.types.type_for_mode (mode, unsignedp);
+
+  /* In order to get a proper overflow indication from an unsigned
+     type, we have to pretend that it's a sizetype.  */
+  mult_type = type;
+  if (unsignedp)
+    {
+      mult_type = copy_node (type);
+      TYPE_IS_SIZETYPE (mult_type) = 1;
+    }
+
+  add_type = (GET_MODE (add) == VOIDmode ? mult_type
+	      : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
+
+  result = fold (build (PLUS_EXPR, mult_type,
+			fold (build (MULT_EXPR, mult_type,
+				     make_tree (mult_type, x),
+				     make_tree (mult_type, mult))),
+			make_tree (add_type, add)));
+
+  return TREE_CONSTANT_OVERFLOW (result);
+}
+
 /* Return an rtx representing the value of X * MULT + ADD.
    TARGET is a suggestion for where to store the result (an rtx).
    MODE is the machine mode for the computation.
    X and MULT must have mode MODE.  ADD may have a different mode.
    So can X (defaults to same as MODE).
-   UNSIGNEDP is non-zero to do unsigned multiplication.
+   UNSIGNEDP is nonzero to do unsigned multiplication.
    This may emit insns.  */
 
 rtx
-expand_mult_add (x, target, mult, add, mode, unsignedp)
-     rtx x, target, mult, add;
-     enum machine_mode mode;
-     int unsignedp;
+expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
+		 int unsignedp)
 {
-  tree type = type_for_mode (mode, unsignedp);
+  tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
   tree add_type = (GET_MODE (add) == VOIDmode
-		   ? type : type_for_mode (GET_MODE (add), unsignedp));
+		   ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
+							   unsignedp));
   tree result =  fold (build (PLUS_EXPR, type,
 			      fold (build (MULT_EXPR, type,
 					   make_tree (type, x),
@@ -4186,9 +4389,7 @@ expand_mult_add (x, target, mult, add, mode, unsignedp)
    If TARGET is 0, a pseudo-register or constant is returned.  */
 
 rtx
-expand_and (mode, op0, op1, target)
-     enum machine_mode mode;
-     rtx op0, op1, target;
+expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
 {
   rtx tem = 0;
 
@@ -4220,13 +4421,8 @@ expand_and (mode, op0, op1, target)
    "raw" out of the scc insn.  */
 
 rtx
-emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
-     rtx target;
-     enum rtx_code code;
-     rtx op0, op1;
-     enum machine_mode mode;
-     int unsignedp;
-     int normalizep;
+emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
+		 enum machine_mode mode, int unsignedp, int normalizep)
 {
   rtx subtarget;
   enum insn_code icode;
@@ -4300,19 +4496,27 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
     {
       if (code == EQ || code == NE)
 	{
+	  rtx op00, op01, op0both;
+
 	  /* Do a logical OR of the two words and compare the result.  */
-	  rtx op0h = gen_highpart (word_mode, op0);
-	  rtx op0l = gen_lowpart (word_mode, op0);
-	  rtx op0both = expand_binop (word_mode, ior_optab, op0h, op0l,
-				      NULL_RTX, unsignedp, OPTAB_DIRECT);
+	  op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
+	  op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
+	  op0both = expand_binop (word_mode, ior_optab, op00, op01,
+				  NULL_RTX, unsignedp, OPTAB_DIRECT);
 	  if (op0both != 0)
 	    return emit_store_flag (target, code, op0both, op1, word_mode,
 				    unsignedp, normalizep);
 	}
       else if (code == LT || code == GE)
-	/* If testing the sign bit, can just test on high word.  */
-	return emit_store_flag (target, code, gen_highpart (word_mode, op0),
-				op1, word_mode, unsignedp, normalizep);
+	{
+	  rtx op0h;
+
+	  /* If testing the sign bit, can just test on high word.  */
+	  op0h = simplify_gen_subreg (word_mode, op0, mode,
+				      subreg_highpart_offset (word_mode, mode));
+	  return emit_store_flag (target, code, op0h, op1, word_mode,
+				  unsignedp, normalizep);
+	}
     }
 
   /* From now on, we won't change CODE, so set ICODE now.  */
@@ -4325,7 +4529,7 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
       && (normalizep || STORE_FLAG_VALUE == 1
 	  || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
 	      && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
-		  == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
+		  == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
     {
       subtarget = target;
 
@@ -4378,11 +4582,28 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
 
       comparison
 	= compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
-      if (GET_CODE (comparison) == CONST_INT)
-	return (comparison == const0_rtx ? const0_rtx
-		: normalizep == 1 ? const1_rtx
-		: normalizep == -1 ? constm1_rtx
-		: const_true_rtx);
+      if (CONSTANT_P (comparison))
+	{
+	  if (GET_CODE (comparison) == CONST_INT)
+	    {
+	      if (comparison == const0_rtx)
+		return const0_rtx;
+	    }
+#ifdef FLOAT_STORE_FLAG_VALUE
+	  else if (GET_CODE (comparison) == CONST_DOUBLE)
+	    {
+	      if (comparison == CONST0_RTX (GET_MODE (comparison)))
+		return const0_rtx;
+	    }
+#endif
+	  else
+	    abort ();
+	  if (normalizep == 1)
+	    return const1_rtx;
+	  if (normalizep == -1)
+	    return constm1_rtx;
+	  return const_true_rtx;
+	}
 
       /* The code of COMPARISON may not match CODE if compare_from_rtx
 	 decided to swap its operands and reverse the original code.
@@ -4572,7 +4793,7 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
   if (code == EQ || code == NE)
     {
       /* For EQ or NE, one way to do the comparison is to apply an operation
-	 that converts the operand into a positive number if it is non-zero
+	 that converts the operand into a positive number if it is nonzero
 	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
 	 for NE we negate.  This puts the result in the sign bit.  Then we
 	 normalize with a shift, if needed.
@@ -4586,9 +4807,9 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
 	 that is compensated by the subsequent overflow when subtracting
 	 one / negating.  */
 
-      if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
+      if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
 	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
-      else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
+      else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
 	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
 	{
@@ -4652,13 +4873,8 @@ emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
 /* Like emit_store_flag, but always succeeds.  */
 
 rtx
-emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
-     rtx target;
-     enum rtx_code code;
-     rtx op0, op1;
-     enum machine_mode mode;
-     int unsignedp;
-     int normalizep;
+emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
+		       enum machine_mode mode, int unsignedp, int normalizep)
 {
   rtx tem, label;
 
@@ -4672,7 +4888,7 @@ emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
 
   /* If this failed, we have to do this with set/compare/jump/set code.  */
 
-  if (GET_CODE (target) != REG
+  if (!REG_P (target)
       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
     target = gen_reg_rtx (GET_MODE (target));
 
@@ -4697,10 +4913,8 @@ emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
    be handled if needed).  */
 
 static void
-do_cmp_and_jump (arg1, arg2, op, mode, label)
-     rtx arg1, arg2, label;
-     enum rtx_code op;
-     enum machine_mode mode;
+do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
+		 rtx label)
 {
   /* If this mode is an integer too wide to compare properly,
      compare word by word.  Rely on cse to optimize constant cases.  */