Daily bump.

[pf3gnuchains/gcc-fork.git] / gcc / expmed.c
diff --git a/gcc/expmed.c b/gcc/expmed.c

index 883eff5..5d213c3 100644 (file)
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -1,7 +1,7 @@
  /* Medium-level subroutines: convert bit-field store and extract
     and shifts, multiplies and divides to rtl instructions.
     Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
-   1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+   1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
  
  This file is part of GCC.
  
@@ -313,6 +313,53 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
  
    value = protect_from_queue (value, 0);
  
+  /* Use vec_extract patterns for extracting parts of vectors whenever
+     available.  */
+  if (VECTOR_MODE_P (GET_MODE (op0))
+      && GET_CODE (op0) != MEM
+      && (vec_set_optab->handlers[(int)GET_MODE (op0)].insn_code
+         != CODE_FOR_nothing)
+      && fieldmode == GET_MODE_INNER (GET_MODE (op0))
+      && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
+      && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
+    {
+      enum machine_mode outermode = GET_MODE (op0);
+      enum machine_mode innermode = GET_MODE_INNER (outermode);
+      int icode = (int) vec_set_optab->handlers[(int) outermode].insn_code;
+      int pos = bitnum / GET_MODE_BITSIZE (innermode);
+      rtx rtxpos = GEN_INT (pos);
+      rtx src = value;
+      rtx dest = op0;
+      rtx pat, seq;
+      enum machine_mode mode0 = insn_data[icode].operand[0].mode;
+      enum machine_mode mode1 = insn_data[icode].operand[1].mode;
+      enum machine_mode mode2 = insn_data[icode].operand[2].mode;
+
+      start_sequence ();
+
+      if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
+       src = copy_to_mode_reg (mode1, src);
+
+      if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
+       rtxpos = copy_to_mode_reg (mode1, rtxpos);
+
+      /* We could handle this, but we should always be called with a pseudo
+        for our targets and all insns should take them as outputs.  */
+      if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
+         || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
+         || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
+       abort ();
+      pat = GEN_FCN (icode) (dest, src, rtxpos);
+      seq = get_insns ();
+      end_sequence ();
+      if (pat)
+       {
+         emit_insn (seq);
+         emit_insn (pat);
+         return dest;
+       }
+    }
+
    if (flag_force_mem)
      {
        int old_generating_concat_p = generating_concat_p;
@@ -461,7 +508,9 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
          VOIDmode, because that is what store_field uses to indicate that this
          is a bit field, but passing VOIDmode to operand_subword_force will
          result in an abort.  */
-      fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
+      fieldmode = GET_MODE (value);
+      if (fieldmode == VOIDmode)
+       fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
  
        for (i = 0; i < nwords; i++)
         {
@@ -477,10 +526,7 @@ store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
           store_bit_field (op0, MIN (BITS_PER_WORD,
                                      bitsize - i * BITS_PER_WORD),
                            bitnum + bit_offset, word_mode,
-                          operand_subword_force (value, wordnum,
-                                                 (GET_MODE (value) == VOIDmode
-                                                  ? fieldmode
-                                                  : GET_MODE (value))),
+                          operand_subword_force (value, wordnum, fieldmode),
                            total_size);
         }
        return value;
@@ -1036,6 +1082,62 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
        return op0;
      }
  
+  /* Use vec_extract patterns for extracting parts of vectors whenever
+     available.  */
+  if (VECTOR_MODE_P (GET_MODE (op0))
+      && GET_CODE (op0) != MEM
+      && (vec_extract_optab->handlers[(int)GET_MODE (op0)].insn_code
+         != CODE_FOR_nothing)
+      && ((bitsize + bitnum) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
+         == bitsize / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
+    {
+      enum machine_mode outermode = GET_MODE (op0);
+      enum machine_mode innermode = GET_MODE_INNER (outermode);
+      int icode = (int) vec_extract_optab->handlers[(int) outermode].insn_code;
+      int pos = bitnum / GET_MODE_BITSIZE (innermode);
+      rtx rtxpos = GEN_INT (pos);
+      rtx src = op0;
+      rtx dest = NULL, pat, seq;
+      enum machine_mode mode0 = insn_data[icode].operand[0].mode;
+      enum machine_mode mode1 = insn_data[icode].operand[1].mode;
+      enum machine_mode mode2 = insn_data[icode].operand[2].mode;
+
+      if (innermode == tmode || innermode == mode)
+       dest = target;
+
+      if (!dest)
+       dest = gen_reg_rtx (innermode);
+
+      start_sequence ();
+
+      if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
+       dest = copy_to_mode_reg (mode0, dest);
+
+      if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
+       src = copy_to_mode_reg (mode1, src);
+
+      if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
+       rtxpos = copy_to_mode_reg (mode1, rtxpos);
+
+      /* We could handle this, but we should always be called with a pseudo
+        for our targets and all insns should take them as outputs.  */
+      if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
+         || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
+         || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
+       abort ();
+      pat = GEN_FCN (icode) (dest, src, rtxpos);
+      seq = get_insns ();
+      end_sequence ();
+      if (pat)
+       {
+         emit_insn (seq);
+         emit_insn (pat);
+         return extract_bit_field (dest, bitsize,
+                                   bitnum - pos * GET_MODE_BITSIZE (innermode),
+                                   unsignedp, target, mode, tmode, total_size);
+       }
+    }
+
    /* Make sure we are playing with integral modes.  Pun with subregs
       if we aren't.  */
    {
@@ -1080,13 +1182,18 @@ extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
       If that's wrong, the solution is to test for it and set TARGET to 0
       if needed.  */
  
-  mode1  = (VECTOR_MODE_P (tmode)
-           ? mode
-           : mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0));
+  /* Only scalar integer modes can be converted via subregs.  There is an
+     additional problem for FP modes here in that they can have a precision
+     which is different from the size.  mode_for_size uses precision, but
+     we want a mode based on the size, so we must avoid calling it for FP
+     modes.  */
+  mode1  = (SCALAR_INT_MODE_P (tmode)
+           ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
+           : mode);
  
    if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
         && bitpos % BITS_PER_WORD == 0)
-       || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
+       || (mode1 != BLKmode
            /* ??? The big endian test here is wrong.  This is correct
               if the value is in a register, and if mode_for_size is not
               the same mode as op0.  This causes us to get unnecessarily
@@ -1885,7 +1992,6 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
  
    op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
  
-#ifdef SHIFT_COUNT_TRUNCATED
    if (SHIFT_COUNT_TRUNCATED)
      {
        if (GET_CODE (op1) == CONST_INT
@@ -1897,7 +2003,6 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
                && subreg_lowpart_p (op1))
         op1 = SUBREG_REG (op1);
      }
-#endif
  
    if (op1 == const0_rtx)
      return shifted;
@@ -2044,11 +2149,24 @@ struct algorithm
    char log[MAX_BITS_PER_WORD];
  };
  
+/* Indicates the type of fixup needed after a constant multiplication.
+   BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
+   the result should be negated, and ADD_VARIANT means that the
+   multiplicand should be added to the result.  */
+enum mult_variant {basic_variant, negate_variant, add_variant};
+
  static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
+static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
+                                struct algorithm *, enum mult_variant *, int);
+static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
+                             const struct algorithm *, enum mult_variant);
  static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
                                                  int, unsigned HOST_WIDE_INT *,
                                                  int *, int *);
  static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
+static rtx extract_high_half (enum machine_mode, rtx);
+static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
+                                      int, int);
  /* Compute and return the best algorithm for multiplying by T.
     The algorithm must cost less than cost_limit
     If retval.cost >= COST_LIMIT, no algorithm was found and all
@@ -2096,8 +2214,8 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
  
    /* We'll be needing a couple extra algorithm structures now.  */
  
-  alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
-  best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
+  alg_in = alloca (sizeof (struct algorithm));
+  best_alg = alloca (sizeof (struct algorithm));
  
    /* If we have a group of zero bits at the low-order part of T, try
       multiplying by the remaining bits and then doing a shift.  */
@@ -2291,6 +2409,192 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
           alg_out->ops * sizeof *alg_out->log);
  }
  \f
+/* Find the cheapest way of multiplying a value of mode MODE by VAL.
+   Try three variations:
+
+       - a shift/add sequence based on VAL itself
+       - a shift/add sequence based on -VAL, followed by a negation
+       - a shift/add sequence based on VAL - 1, followed by an addition.
+
+   Return true if the cheapest of these cost less than MULT_COST,
+   describing the algorithm in *ALG and final fixup in *VARIANT.  */
+
+static bool
+choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
+                    struct algorithm *alg, enum mult_variant *variant,
+                    int mult_cost)
+{
+  struct algorithm alg2;
+
+  *variant = basic_variant;
+  synth_mult (alg, val, mult_cost);
+
+  /* This works only if the inverted value actually fits in an
+     `unsigned int' */
+  if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
+    {
+      synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - negate_cost);
+      alg2.cost += negate_cost;
+      if (alg2.cost < alg->cost)
+       *alg = alg2, *variant = negate_variant;
+    }
+
+  /* This proves very useful for division-by-constant.  */
+  synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost);
+  alg2.cost += add_cost;
+  if (alg2.cost < alg->cost)
+    *alg = alg2, *variant = add_variant;
+
+  return alg->cost < mult_cost;
+}
+
+/* A subroutine of expand_mult, used for constant multiplications.
+   Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
+   convenient.  Use the shift/add sequence described by ALG and apply
+   the final fixup specified by VARIANT.  */
+
+static rtx
+expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
+                  rtx target, const struct algorithm *alg,
+                  enum mult_variant variant)
+{
+  HOST_WIDE_INT val_so_far;
+  rtx insn, accum, tem;
+  int opno;
+  enum machine_mode nmode;
+
+  /* op0 must be register to make mult_cost match the precomputed
+     shiftadd_cost array.  */
+  op0 = protect_from_queue (op0, 0);
+
+  /* Avoid referencing memory over and over.
+     For speed, but also for correctness when mem is volatile.  */
+  if (GET_CODE (op0) == MEM)
+    op0 = force_reg (mode, op0);
+
+  /* ACCUM starts out either as OP0 or as a zero, depending on
+     the first operation.  */
+
+  if (alg->op[0] == alg_zero)
+    {
+      accum = copy_to_mode_reg (mode, const0_rtx);
+      val_so_far = 0;
+    }
+  else if (alg->op[0] == alg_m)
+    {
+      accum = copy_to_mode_reg (mode, op0);
+      val_so_far = 1;
+    }
+  else
+    abort ();
+
+  for (opno = 1; opno < alg->ops; opno++)
+    {
+      int log = alg->log[opno];
+      int preserve = preserve_subexpressions_p ();
+      rtx shift_subtarget = preserve ? 0 : accum;
+      rtx add_target
+       = (opno == alg->ops - 1 && target != 0 && variant != add_variant
+          && ! preserve)
+         ? target : 0;
+      rtx accum_target = preserve ? 0 : accum;
+
+      switch (alg->op[opno])
+       {
+       case alg_shift:
+         accum = expand_shift (LSHIFT_EXPR, mode, accum,
+                               build_int_2 (log, 0), NULL_RTX, 0);
+         val_so_far <<= log;
+         break;
+
+       case alg_add_t_m2:
+         tem = expand_shift (LSHIFT_EXPR, mode, op0,
+                             build_int_2 (log, 0), NULL_RTX, 0);
+         accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
+                                add_target ? add_target : accum_target);
+         val_so_far += (HOST_WIDE_INT) 1 << log;
+         break;
+
+       case alg_sub_t_m2:
+         tem = expand_shift (LSHIFT_EXPR, mode, op0,
+                             build_int_2 (log, 0), NULL_RTX, 0);
+         accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
+                                add_target ? add_target : accum_target);
+         val_so_far -= (HOST_WIDE_INT) 1 << log;
+         break;
+
+       case alg_add_t2_m:
+         accum = expand_shift (LSHIFT_EXPR, mode, accum,
+                               build_int_2 (log, 0), shift_subtarget,
+                               0);
+         accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
+                                add_target ? add_target : accum_target);
+         val_so_far = (val_so_far << log) + 1;
+         break;
+
+       case alg_sub_t2_m:
+         accum = expand_shift (LSHIFT_EXPR, mode, accum,
+                               build_int_2 (log, 0), shift_subtarget, 0);
+         accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
+                                add_target ? add_target : accum_target);
+         val_so_far = (val_so_far << log) - 1;
+         break;
+
+       case alg_add_factor:
+         tem = expand_shift (LSHIFT_EXPR, mode, accum,
+                             build_int_2 (log, 0), NULL_RTX, 0);
+         accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
+                                add_target ? add_target : accum_target);
+         val_so_far += val_so_far << log;
+         break;
+
+       case alg_sub_factor:
+         tem = expand_shift (LSHIFT_EXPR, mode, accum,
+                             build_int_2 (log, 0), NULL_RTX, 0);
+         accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
+                                (add_target ? add_target
+                                 : preserve ? 0 : tem));
+         val_so_far = (val_so_far << log) - val_so_far;
+         break;
+
+       default:
+         abort ();
+       }
+
+      /* Write a REG_EQUAL note on the last insn so that we can cse
+        multiplication sequences.  Note that if ACCUM is a SUBREG,
+        we've set the inner register and must properly indicate
+        that.  */
+
+      tem = op0, nmode = mode;
+      if (GET_CODE (accum) == SUBREG)
+       {
+         nmode = GET_MODE (SUBREG_REG (accum));
+         tem = gen_lowpart (nmode, op0);
+       }
+
+      insn = get_last_insn ();
+      set_unique_reg_note (insn, REG_EQUAL,
+                          gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
+    }
+
+  if (variant == negate_variant)
+    {
+      val_so_far = -val_so_far;
+      accum = expand_unop (mode, neg_optab, accum, target, 0);
+    }
+  else if (variant == add_variant)
+    {
+      val_so_far = val_so_far + 1;
+      accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
+    }
+
+  if (val != val_so_far)
+    abort ();
+
+  return accum;
+}
+
  /* Perform a multiplication and return an rtx for the result.
     MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
     TARGET is a suggestion for where to store the result (an rtx).
@@ -2300,9 +2604,12 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
     you should swap the two operands if OP0 would be constant.  */
  
  rtx
-expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, int unsignedp)
+expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
+            int unsignedp)
  {
    rtx const_op1 = op1;
+  enum mult_variant variant;
+  struct algorithm algorithm;
  
    /* synth_mult does an `unsigned int' multiply.  As long as the mode is
       less than or equal in size to `unsigned int' this doesn't matter.
@@ -2329,188 +2636,36 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, int unsignedp
       that it seems better to use synth_mult always.  */
  
    if (const_op1 && GET_CODE (const_op1) == CONST_INT
-      && (unsignedp || ! flag_trapv))
+      && (unsignedp || !flag_trapv))
      {
-      struct algorithm alg;
-      struct algorithm alg2;
-      HOST_WIDE_INT val = INTVAL (op1);
-      HOST_WIDE_INT val_so_far;
-      rtx insn;
-      int mult_cost;
-      enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
-
-      /* op0 must be register to make mult_cost match the precomputed
-         shiftadd_cost array.  */
-      op0 = force_reg (mode, op0);
-
-      /* Try to do the computation three ways: multiply by the negative of OP1
-        and then negate, do the multiplication directly, or do multiplication
-        by OP1 - 1.  */
-
-      mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
+      int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
        mult_cost = MIN (12 * add_cost, mult_cost);
  
-      synth_mult (&alg, val, mult_cost);
+      if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
+                              mult_cost))
+       return expand_mult_const (mode, op0, INTVAL (const_op1), target,
+                                 &algorithm, variant);
+    }
  
-      /* This works only if the inverted value actually fits in an
-        `unsigned int' */
-      if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
-       {
-         synth_mult (&alg2, - val,
-                     (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
-         if (alg2.cost + negate_cost < alg.cost)
-           alg = alg2, variant = negate_variant;
-       }
+  if (GET_CODE (op0) == CONST_DOUBLE)
+    {
+      rtx temp = op0;
+      op0 = op1;
+      op1 = temp;
+    }
  
-      /* This proves very useful for division-by-constant.  */
-      synth_mult (&alg2, val - 1,
-                 (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
-      if (alg2.cost + add_cost < alg.cost)
-       alg = alg2, variant = add_variant;
+  /* Expand x*2.0 as x+x.  */
+  if (GET_CODE (op1) == CONST_DOUBLE
+      && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      REAL_VALUE_TYPE d;
+      REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
  
-      if (alg.cost < mult_cost)
+      if (REAL_VALUES_EQUAL (d, dconst2))
         {
-         /* We found something cheaper than a multiply insn.  */
-         int opno;
-         rtx accum, tem;
-         enum machine_mode nmode;
-
-         op0 = protect_from_queue (op0, 0);
-
-         /* Avoid referencing memory over and over.
-            For speed, but also for correctness when mem is volatile.  */
-         if (GET_CODE (op0) == MEM)
-           op0 = force_reg (mode, op0);
-
-         /* ACCUM starts out either as OP0 or as a zero, depending on
-            the first operation.  */
-
-         if (alg.op[0] == alg_zero)
-           {
-             accum = copy_to_mode_reg (mode, const0_rtx);
-             val_so_far = 0;
-           }
-         else if (alg.op[0] == alg_m)
-           {
-             accum = copy_to_mode_reg (mode, op0);
-             val_so_far = 1;
-           }
-         else
-           abort ();
-
-         for (opno = 1; opno < alg.ops; opno++)
-           {
-             int log = alg.log[opno];
-             int preserve = preserve_subexpressions_p ();
-             rtx shift_subtarget = preserve ? 0 : accum;
-             rtx add_target
-               = (opno == alg.ops - 1 && target != 0 && variant != add_variant
-                  && ! preserve)
-                 ? target : 0;
-             rtx accum_target = preserve ? 0 : accum;
-
-             switch (alg.op[opno])
-               {
-               case alg_shift:
-                 accum = expand_shift (LSHIFT_EXPR, mode, accum,
-                                       build_int_2 (log, 0), NULL_RTX, 0);
-                 val_so_far <<= log;
-                 break;
-
-               case alg_add_t_m2:
-                 tem = expand_shift (LSHIFT_EXPR, mode, op0,
-                                     build_int_2 (log, 0), NULL_RTX, 0);
-                 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
-                                        add_target
-                                        ? add_target : accum_target);
-                 val_so_far += (HOST_WIDE_INT) 1 << log;
-                 break;
-
-               case alg_sub_t_m2:
-                 tem = expand_shift (LSHIFT_EXPR, mode, op0,
-                                     build_int_2 (log, 0), NULL_RTX, 0);
-                 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
-                                        add_target
-                                        ? add_target : accum_target);
-                 val_so_far -= (HOST_WIDE_INT) 1 << log;
-                 break;
-
-               case alg_add_t2_m:
-                 accum = expand_shift (LSHIFT_EXPR, mode, accum,
-                                       build_int_2 (log, 0), shift_subtarget,
-                                       0);
-                 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
-                                        add_target
-                                        ? add_target : accum_target);
-                 val_so_far = (val_so_far << log) + 1;
-                 break;
-
-               case alg_sub_t2_m:
-                 accum = expand_shift (LSHIFT_EXPR, mode, accum,
-                                       build_int_2 (log, 0), shift_subtarget,
-                                       0);
-                 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
-                                        add_target
-                                        ? add_target : accum_target);
-                 val_so_far = (val_so_far << log) - 1;
-                 break;
-
-               case alg_add_factor:
-                 tem = expand_shift (LSHIFT_EXPR, mode, accum,
-                                     build_int_2 (log, 0), NULL_RTX, 0);
-                 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
-                                        add_target
-                                        ? add_target : accum_target);
-                 val_so_far += val_so_far << log;
-                 break;
-
-               case alg_sub_factor:
-                 tem = expand_shift (LSHIFT_EXPR, mode, accum,
-                                     build_int_2 (log, 0), NULL_RTX, 0);
-                 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
-                                        (add_target ? add_target
-                                         : preserve ? 0 : tem));
-                 val_so_far = (val_so_far << log) - val_so_far;
-                 break;
-
-               default:
-                 abort ();
-               }
-
-             /* Write a REG_EQUAL note on the last insn so that we can cse
-                multiplication sequences.  Note that if ACCUM is a SUBREG,
-                we've set the inner register and must properly indicate
-                that.  */
-
-             tem = op0, nmode = mode;
-             if (GET_CODE (accum) == SUBREG)
-               {
-                 nmode = GET_MODE (SUBREG_REG (accum));
-                 tem = gen_lowpart (nmode, op0);
-               }
-
-             insn = get_last_insn ();
-             set_unique_reg_note (insn,
-                                  REG_EQUAL,
-                                  gen_rtx_MULT (nmode, tem,
-                                                GEN_INT (val_so_far)));
-           }
-
-         if (variant == negate_variant)
-           {
-             val_so_far = - val_so_far;
-             accum = expand_unop (mode, neg_optab, accum, target, 0);
-           }
-         else if (variant == add_variant)
-           {
-             val_so_far = val_so_far + 1;
-             accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
-           }
-
-         if (val != val_so_far)
-           abort ();
-
-         return accum;
+         op0 = force_reg (GET_MODE (op0), op0);
+         return expand_binop (mode, add_optab, op0, op0,
+                              target, unsignedp, OPTAB_LIB_WIDEN);
         }
      }
  
@@ -2605,7 +2760,7 @@ choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
      abort ();
    if (mhigh_hi > 1 || mlow_hi > 1)
      abort ();
-  /* assert that mlow < mhigh.  */
+  /* Assert that mlow < mhigh.  */
    if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
      abort ();
  
@@ -2704,70 +2859,46 @@ expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
    return target;
  }
  
-/* Emit code to multiply OP0 and CNST1, putting the high half of the result
-   in TARGET if that is convenient, and return where the result is.  If the
-   operation can not be performed, 0 is returned.
+/* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
  
-   MODE is the mode of operation and result.
+static rtx
+extract_high_half (enum machine_mode mode, rtx op)
+{
+  enum machine_mode wider_mode;
  
-   UNSIGNEDP nonzero means unsigned multiply.
+  if (mode == word_mode)
+    return gen_highpart (mode, op);
  
-   MAX_COST is the total allowed cost for the expanded RTL.  */
+  wider_mode = GET_MODE_WIDER_MODE (mode);
+  op = expand_shift (RSHIFT_EXPR, wider_mode, op,
+                    build_int_2 (GET_MODE_BITSIZE (mode), 0), 0, 1);
+  return convert_modes (mode, wider_mode, op, 0);
+}
  
-rtx
-expand_mult_highpart (enum machine_mode mode, rtx op0,
-                     unsigned HOST_WIDE_INT cnst1, rtx target,
-                     int unsignedp, int max_cost)
+/* Like expand_mult_highpart, but only consider using a multiplication
+   optab.  OP1 is an rtx for the constant operand.  */
+
+static rtx
+expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
+                           rtx target, int unsignedp, int max_cost)
  {
-  enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
-  optab mul_highpart_optab;
+  enum machine_mode wider_mode;
    optab moptab;
    rtx tem;
-  int size = GET_MODE_BITSIZE (mode);
-  rtx op1, wide_op1;
-
-  /* We can't support modes wider than HOST_BITS_PER_INT.  */
-  if (size > HOST_BITS_PER_WIDE_INT)
-    abort ();
-
-  op1 = gen_int_mode (cnst1, mode);
-
-  wide_op1
-    = immed_double_const (cnst1,
-                         (unsignedp
-                          ? (HOST_WIDE_INT) 0
-                          : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
-                         wider_mode);
-
-  /* expand_mult handles constant multiplication of word_mode
-     or narrower.  It does a poor job for large modes.  */
-  if (size < BITS_PER_WORD
-      && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
-    {
-      /* We have to do this, since expand_binop doesn't do conversion for
-        multiply.  Maybe change expand_binop to handle widening multiply?  */
-      op0 = convert_to_mode (wider_mode, op0, unsignedp);
-
-      /* We know that this can't have signed overflow, so pretend this is
-         an unsigned multiply.  */
-      tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
-      tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-                         build_int_2 (size, 0), NULL_RTX, 1);
-      return convert_modes (mode, wider_mode, tem, unsignedp);
-    }
+  int size;
  
-  if (target == 0)
-    target = gen_reg_rtx (mode);
+  wider_mode = GET_MODE_WIDER_MODE (mode);
+  size = GET_MODE_BITSIZE (mode);
  
    /* Firstly, try using a multiplication insn that only generates the needed
       high part of the product, and in the sign flavor of unsignedp.  */
    if (mul_highpart_cost[(int) mode] < max_cost)
      {
-      mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
-      target = expand_binop (mode, mul_highpart_optab,
-                            op0, op1, target, unsignedp, OPTAB_DIRECT);
-      if (target)
-       return target;
+      moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
+      tem = expand_binop (mode, moptab, op0, op1, target,
+                         unsignedp, OPTAB_DIRECT);
+      if (tem)
+       return tem;
      }
  
    /* Secondly, same as above, but use sign flavor opposite of unsignedp.
@@ -2776,13 +2907,13 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
        && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
           < max_cost))
      {
-      mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
-      target = expand_binop (mode, mul_highpart_optab,
-                            op0, op1, target, unsignedp, OPTAB_DIRECT);
-      if (target)
+      moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
+      tem = expand_binop (mode, moptab, op0, op1, target,
+                         unsignedp, OPTAB_DIRECT);
+      if (tem)
         /* We used the wrong signedness.  Adjust the result.  */
-       return expand_mult_highpart_adjust (mode, target, op0,
-                                           op1, target, unsignedp);
+       return expand_mult_highpart_adjust (mode, tem, op0, op1,
+                                           tem, unsignedp);
      }
  
    /* Try widening multiplication.  */
@@ -2790,8 +2921,10 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
    if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
        && mul_widen_cost[(int) wider_mode] < max_cost)
      {
-      op1 = force_reg (mode, op1);
-      goto try;
+      tem = expand_binop (wider_mode, moptab, op0, op1, 0,
+                         unsignedp, OPTAB_WIDEN);
+      if (tem)
+       return extract_high_half (mode, tem);
      }
  
    /* Try widening the mode and perform a non-widening multiplication.  */
@@ -2800,8 +2933,10 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
        && size - 1 < BITS_PER_WORD
        && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
      {
-      op1 = wide_op1;
-      goto try;
+      tem = expand_binop (wider_mode, moptab, op0, op1, 0,
+                         unsignedp, OPTAB_WIDEN);
+      if (tem)
+       return extract_high_half (mode, tem);
      }
  
    /* Try widening multiplication of opposite signedness, and adjust.  */
@@ -2816,10 +2951,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
        if (tem != 0)
         {
-         /* Extract the high half of the just generated product.  */
-         tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-                             build_int_2 (size, 0), NULL_RTX, 1);
-         tem = convert_modes (mode, wider_mode, tem, unsignedp);
+         tem = extract_high_half (mode, tem);
           /* We used the wrong signedness.  Adjust the result.  */
           return expand_mult_highpart_adjust (mode, tem, op0, op1,
                                               target, unsignedp);
@@ -2827,25 +2959,77 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
      }
  
    return 0;
+}
  
- try:
-  /* Pass NULL_RTX as target since TARGET has wrong mode.  */
-  tem = expand_binop (wider_mode, moptab, op0, op1,
-                     NULL_RTX, unsignedp, OPTAB_WIDEN);
-  if (tem == 0)
-    return 0;
+/* Emit code to multiply OP0 and CNST1, putting the high half of the result
+   in TARGET if that is convenient, and return where the result is.  If the
+   operation can not be performed, 0 is returned.
  
-  /* Extract the high half of the just generated product.  */
-  if (mode == word_mode)
+   MODE is the mode of operation and result.
+
+   UNSIGNEDP nonzero means unsigned multiply.
+
+   MAX_COST is the total allowed cost for the expanded RTL.  */
+
+rtx
+expand_mult_highpart (enum machine_mode mode, rtx op0,
+                     unsigned HOST_WIDE_INT cnst1, rtx target,
+                     int unsignedp, int max_cost)
+{
+  enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
+  int extra_cost;
+  bool sign_adjust = false;
+  enum mult_variant variant;
+  struct algorithm alg;
+  rtx op1, tem;
+
+  /* We can't support modes wider than HOST_BITS_PER_INT.  */
+  if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
+    abort ();
+
+  op1 = gen_int_mode (cnst1, mode);
+  cnst1 &= GET_MODE_MASK (mode);
+
+  /* We can't optimize modes wider than BITS_PER_WORD. 
+     ??? We might be able to perform double-word arithmetic if 
+     mode == word_mode, however all the cost calculations in
+     synth_mult etc. assume single-word operations.  */
+  if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
+    return expand_mult_highpart_optab (mode, op0, op1, target,
+                                      unsignedp, max_cost);
+
+  extra_cost = shift_cost[GET_MODE_BITSIZE (mode) - 1];
+
+  /* Check whether we try to multiply by a negative constant.  */
+  if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
      {
-      return gen_highpart (mode, tem);
+      sign_adjust = true;
+      extra_cost += add_cost;
      }
-  else
+
+  /* See whether shift/add multiplication is cheap enough.  */
+  if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
+                          max_cost - extra_cost))
      {
-      tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-                         build_int_2 (size, 0), NULL_RTX, 1);
-      return convert_modes (mode, wider_mode, tem, unsignedp);
+      /* See whether the specialized multiplication optabs are
+        cheaper than the shift/add version.  */
+      tem = expand_mult_highpart_optab (mode, op0, op1, target,
+                                       unsignedp, alg.cost + extra_cost);
+      if (tem)
+       return tem;
+
+      tem = convert_to_mode (wider_mode, op0, unsignedp);
+      tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
+      tem = extract_high_half (mode, tem);
+
+      /* Adjust result for signedness. */
+      if (sign_adjust)
+       tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
+
+      return tem;
      }
+  return expand_mult_highpart_optab (mode, op0, op1, target,
+                                    unsignedp, max_cost);
  }
  \f
  /* Emit the code to divide OP0 by OP1, putting the result in TARGET
@@ -3801,7 +3985,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
             quotient = expand_mult (compute_mode, t1,
                                     gen_int_mode (ml, compute_mode),
-                                   NULL_RTX, 0);
+                                   NULL_RTX, 1);
  
             insn = get_last_insn ();
             set_unique_reg_note (insn,
@@ -3992,8 +4176,9 @@ make_tree (tree type, rtx x)
      {
      case CONST_INT:
        t = build_int_2 (INTVAL (x),
-                      (TREE_UNSIGNED (type)
-                       && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
+                      (TYPE_UNSIGNED (type)
+                       && (GET_MODE_BITSIZE (TYPE_MODE (type))
+                           < HOST_BITS_PER_WIDE_INT))
                        || INTVAL (x) >= 0 ? 0 : -1);
        TREE_TYPE (t) = type;
        return t;
@@ -4052,14 +4237,14 @@ make_tree (tree type, rtx x)
                           make_tree (type, XEXP (x, 1))));
  
      case LSHIFTRT:
-      t = (*lang_hooks.types.unsigned_type) (type);
+      t = lang_hooks.types.unsigned_type (type);
        return fold (convert (type,
                             build (RSHIFT_EXPR, t,
                                    make_tree (t, XEXP (x, 0)),
                                    make_tree (type, XEXP (x, 1)))));
  
      case ASHIFTRT:
-      t = (*lang_hooks.types.signed_type) (type);
+      t = lang_hooks.types.signed_type (type);
        return fold (convert (type,
                             build (RSHIFT_EXPR, t,
                                    make_tree (t, XEXP (x, 0)),
@@ -4067,7 +4252,7 @@ make_tree (tree type, rtx x)
  
      case DIV:
        if (TREE_CODE (type) != REAL_TYPE)
-       t = (*lang_hooks.types.signed_type) (type);
+       t = lang_hooks.types.signed_type (type);
        else
         t = type;
  
@@ -4076,7 +4261,7 @@ make_tree (tree type, rtx x)
                                    make_tree (t, XEXP (x, 0)),
                                    make_tree (t, XEXP (x, 1)))));
      case UDIV:
-      t = (*lang_hooks.types.unsigned_type) (type);
+      t = lang_hooks.types.unsigned_type (type);
        return fold (convert (type,
                             build (TRUNC_DIV_EXPR, t,
                                    make_tree (t, XEXP (x, 0)),
@@ -4084,20 +4269,18 @@ make_tree (tree type, rtx x)
  
      case SIGN_EXTEND:
      case ZERO_EXTEND:
-      t = (*lang_hooks.types.type_for_mode) (GET_MODE (XEXP (x, 0)),
-                                            GET_CODE (x) == ZERO_EXTEND);
+      t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
+                                         GET_CODE (x) == ZERO_EXTEND);
        return fold (convert (type, make_tree (t, XEXP (x, 0))));
  
     default:
        t = make_node (RTL_EXPR);
        TREE_TYPE (t) = type;
  
-#ifdef POINTERS_EXTEND_UNSIGNED
        /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
          ptr_mode.  So convert.  */
-      if (POINTER_TYPE_P (type) && GET_MODE (x) != TYPE_MODE (type))
+      if (POINTER_TYPE_P (type))
         x = convert_memory_address (TYPE_MODE (type), x);
-#endif
  
        RTL_EXPR_RTL (t) = x;
        /* There are no insns to be output
@@ -4119,7 +4302,7 @@ const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int
  {
    tree type, mult_type, add_type, result;
  
-  type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
+  type = lang_hooks.types.type_for_mode (mode, unsignedp);
  
    /* In order to get a proper overflow indication from an unsigned
       type, we have to pretend that it's a sizetype.  */
@@ -4131,7 +4314,7 @@ const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int
      }
  
    add_type = (GET_MODE (add) == VOIDmode ? mult_type
-             : (*lang_hooks.types.type_for_mode) (GET_MODE (add), unsignedp));
+             : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
  
    result = fold (build (PLUS_EXPR, mult_type,
                         fold (build (MULT_EXPR, mult_type,
@@ -4154,10 +4337,10 @@ rtx
  expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
                  int unsignedp)
  {
-  tree type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
+  tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
    tree add_type = (GET_MODE (add) == VOIDmode
-                  ? type: (*lang_hooks.types.type_for_mode) (GET_MODE (add),
-                                                             unsignedp));
+                  ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
+                                                          unsignedp));
    tree result =  fold (build (PLUS_EXPR, type,
                               fold (build (MULT_EXPR, type,
                                            make_tree (type, x),