* gengtype.c (adjust_field_rtx_def): Add NOTE_INSN_DELETED_LABEL's

[pf3gnuchains/gcc-fork.git] / gcc / expmed.c
diff --git a/gcc/expmed.c b/gcc/expmed.c

index 5e8d7f3..16f7415 100644 (file)
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -1,7 +1,7 @@
  /* Medium-level subroutines: convert bit-field store and extract
     and shifts, multiplies and divides to rtl instructions.
     Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
-   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
+   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
     Free Software Foundation, Inc.
  
  This file is part of GCC.
@@ -103,7 +103,8 @@ static int add_cost[2][NUM_MACHINE_MODES];
  static int neg_cost[2][NUM_MACHINE_MODES];
  static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
  static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
-static int shiftsub_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
+static int shiftsub0_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
+static int shiftsub1_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
  static int mul_cost[2][NUM_MACHINE_MODES];
  static int sdiv_cost[2][NUM_MACHINE_MODES];
  static int udiv_cost[2][NUM_MACHINE_MODES];
@@ -130,7 +131,8 @@ init_expmed (void)
      struct rtx_def shift;      rtunion shift_fld1;
      struct rtx_def shift_mult; rtunion shift_mult_fld1;
      struct rtx_def shift_add;  rtunion shift_add_fld1;
-    struct rtx_def shift_sub;  rtunion shift_sub_fld1;
+    struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
+    struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
    } all;
  
    rtx pow2[MAX_BITS_PER_WORD];
@@ -201,14 +203,18 @@ init_expmed (void)
    XEXP (&all.shift_add, 0) = &all.shift_mult;
    XEXP (&all.shift_add, 1) = &all.reg;
  
-  PUT_CODE (&all.shift_sub, MINUS);
-  XEXP (&all.shift_sub, 0) = &all.shift_mult;
-  XEXP (&all.shift_sub, 1) = &all.reg;
+  PUT_CODE (&all.shift_sub0, MINUS);
+  XEXP (&all.shift_sub0, 0) = &all.shift_mult;
+  XEXP (&all.shift_sub0, 1) = &all.reg;
+
+  PUT_CODE (&all.shift_sub1, MINUS);
+  XEXP (&all.shift_sub1, 0) = &all.reg;
+  XEXP (&all.shift_sub1, 1) = &all.shift_mult;
  
    for (speed = 0; speed < 2; speed++)
      {
        crtl->maybe_hot_insn_p = speed;
-      zero_cost[speed] = rtx_cost (const0_rtx, 0, speed);
+      zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
  
        for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
            mode != VOIDmode;
@@ -226,7 +232,8 @@ init_expmed (void)
           PUT_MODE (&all.shift, mode);
           PUT_MODE (&all.shift_mult, mode);
           PUT_MODE (&all.shift_add, mode);
-         PUT_MODE (&all.shift_sub, mode);
+         PUT_MODE (&all.shift_sub0, mode);
+         PUT_MODE (&all.shift_sub1, mode);
  
           add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
           neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
@@ -254,8 +261,8 @@ init_expmed (void)
             }
  
           shift_cost[speed][mode][0] = 0;
-         shiftadd_cost[speed][mode][0] = shiftsub_cost[speed][mode][0]
-           = add_cost[speed][mode];
+         shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
+           = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
  
           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
           for (m = 1; m < n; m++)
@@ -265,7 +272,8 @@ init_expmed (void)
  
               shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
               shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
-             shiftsub_cost[speed][mode][m] = rtx_cost (&all.shift_sub, SET, speed);
+             shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
+             shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
             }
         }
      }
@@ -532,6 +540,7 @@ store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
        int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
        rtx insn;
        rtx start = get_last_insn ();
+      rtx arg0 = op0;
  
        /* Get appropriate low part of the value being stored.  */
        if (GET_CODE (value) == CONST_INT || REG_P (value))
@@ -552,11 +561,11 @@ store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
                       || GET_MODE_CLASS (fieldmode) == MODE_INT
                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
-         op0 = SUBREG_REG (op0);
+         arg0 = SUBREG_REG (op0);
         }
  
        insn = (GEN_FCN (icode)
-                (gen_rtx_SUBREG (fieldmode, op0,
+                (gen_rtx_SUBREG (fieldmode, arg0,
                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
                                   + (offset * UNITS_PER_WORD)),
                                   value));
@@ -748,6 +757,16 @@ store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
        if (pat)
         {
           emit_insn (pat);
+
+         /* If the mode of the insertion is wider than the mode of the
+            target register we created a paradoxical subreg for the
+            target.  Truncate the paradoxical subreg of the target to
+            itself properly.  */
+         if (!TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (op0)),
+                                     GET_MODE_BITSIZE (op_mode))
+             && (REG_P (xop0)
+                 || GET_CODE (xop0) == SUBREG))
+             convert_move (op0, xop0, true);
           return true;
         }
        delete_insns_since (last);
@@ -1278,9 +1297,8 @@ extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
        {
         if (MEM_P (op0))
           op0 = adjust_address (op0, imode, 0);
-       else
+       else if (imode != BLKmode)
           {
-           gcc_assert (imode != BLKmode);
             op0 = gen_lowpart (imode, op0);
  
             /* If we got a SUBREG, force it into a register since we
@@ -1288,6 +1306,24 @@ extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
             if (GET_CODE (op0) == SUBREG)
               op0 = force_reg (imode, op0);
           }
+       else if (REG_P (op0))
+         {
+           rtx reg, subreg;
+           imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
+                                           MODE_INT);
+           reg = gen_reg_rtx (imode);
+           subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
+           emit_move_insn (subreg, op0);
+           op0 = reg;
+           bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
+         }
+       else
+         {
+           rtx mem = assign_stack_temp (GET_MODE (op0),
+                                        GET_MODE_SIZE (GET_MODE (op0)), 0);
+           emit_move_insn (mem, op0);
+           op0 = adjust_address (mem, BLKmode, 0);
+         }
        }
    }
  
@@ -1516,7 +1552,13 @@ extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
  
        if (GET_MODE (xtarget) != ext_mode)
         {
-         if (REG_P (xtarget))
+         /* Don't use LHS paradoxical subreg if explicit truncation is needed
+            between the mode of the extraction (word_mode) and the target
+            mode.  Instead, create a temporary and use convert_move to set
+            the target.  */
+         if (REG_P (xtarget)
+             && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)),
+                                       GET_MODE_BITSIZE (ext_mode)))
             {
               xtarget = gen_lowpart (ext_mode, xtarget);
               if (GET_MODE_SIZE (ext_mode)
@@ -2102,7 +2144,8 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
                        % GET_MODE_BITSIZE (mode));
        else if (GET_CODE (op1) == SUBREG
-              && subreg_lowpart_p (op1))
+              && subreg_lowpart_p (op1)
+              && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
         op1 = SUBREG_REG (op1);
      }
  
@@ -2362,6 +2405,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
    struct mult_cost best_cost;
    struct mult_cost new_limit;
    int op_cost, op_latency;
+  unsigned HOST_WIDE_INT orig_t = t;
    unsigned HOST_WIDE_INT q;
    int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
    int hash_index;
@@ -2507,6 +2551,38 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
               best_alg->log[best_alg->ops] = m;
               best_alg->op[best_alg->ops] = alg_shift;
             }
+
+         /* See if treating ORIG_T as a signed number yields a better
+            sequence.  Try this sequence only for a negative ORIG_T
+            as it would be useless for a non-negative ORIG_T.  */
+         if ((HOST_WIDE_INT) orig_t < 0)
+           {
+             /* Shift ORIG_T as follows because a right shift of a
+                negative-valued signed type is implementation
+                defined.  */
+             q = ~(~orig_t >> m);
+             /* The function expand_shift will choose between a shift
+                and a sequence of additions, so the observed cost is
+                given as MIN (m * add_cost[speed][mode],
+                shift_cost[speed][mode][m]).  */
+             op_cost = m * add_cost[speed][mode];
+             if (shift_cost[speed][mode][m] < op_cost)
+               op_cost = shift_cost[speed][mode][m];
+             new_limit.cost = best_cost.cost - op_cost;
+             new_limit.latency = best_cost.latency - op_cost;
+             synth_mult (alg_in, q, &new_limit, mode);
+
+             alg_in->cost.cost += op_cost;
+             alg_in->cost.latency += op_cost;
+             if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
+               {
+                 struct algorithm *x;
+                 best_cost = alg_in->cost;
+                 x = alg_in, alg_in = best_alg, best_alg = x;
+                 best_alg->log[best_alg->ops] = m;
+                 best_alg->op[best_alg->ops] = alg_shift;
+               }
+           }
         }
        if (cache_hit)
         goto done;
@@ -2569,6 +2645,29 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
               best_alg->op[best_alg->ops] = alg_add_t_m2;
             }
         }
+
+      /* We may be able to calculate a * -7, a * -15, a * -31, etc
+        quickly with a - a * n for some appropriate constant n.  */
+      m = exact_log2 (-orig_t + 1);
+      if (m >= 0 && m < maxm)
+       {
+         op_cost = shiftsub1_cost[speed][mode][m];
+         new_limit.cost = best_cost.cost - op_cost;
+         new_limit.latency = best_cost.latency - op_cost;
+         synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
+
+         alg_in->cost.cost += op_cost;
+         alg_in->cost.latency += op_cost;
+         if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
+           {
+             struct algorithm *x;
+             best_cost = alg_in->cost;
+             x = alg_in, alg_in = best_alg, best_alg = x;
+             best_alg->log[best_alg->ops] = m;
+             best_alg->op[best_alg->ops] = alg_sub_t_m2;
+           }
+       }
+
        if (cache_hit)
         goto done;
      }
@@ -2638,9 +2737,9 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
              hardware the shift may be executed concurrently with the
              earlier steps in the algorithm.  */
           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
-         if (shiftsub_cost[speed][mode][m] < op_cost)
+         if (shiftsub0_cost[speed][mode][m] < op_cost)
             {
-             op_cost = shiftsub_cost[speed][mode][m];
+             op_cost = shiftsub0_cost[speed][mode][m];
               op_latency = op_cost;
             }
           else
@@ -2703,7 +2802,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
        m = exact_log2 (q);
        if (m >= 0 && m < maxm)
         {
-         op_cost = shiftsub_cost[speed][mode][m];
+         op_cost = shiftsub0_cost[speed][mode][m];
           new_limit.cost = best_cost.cost - op_cost;
           new_limit.latency = best_cost.latency - op_cost;
           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
@@ -3963,10 +4062,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
                       {
                         /* Most significant bit of divisor is set; emit an scc
                            insn.  */
-                       quotient = emit_store_flag (tquotient, GEU, op0, op1,
-                                                   compute_mode, 1, 1);
-                       if (quotient == 0)
-                         goto fail1;
+                       quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
+                                                         compute_mode, 1, 1);
                       }
                     else
                       {
@@ -5110,8 +5207,9 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
    enum insn_code icode;
    enum machine_mode compare_mode;
    enum machine_mode target_mode = GET_MODE (target);
+  enum mode_class mclass;
    rtx tem;
-  rtx last = get_last_insn ();
+  rtx last;
    rtx pattern, comparison;
  
    if (unsignedp)
@@ -5245,117 +5343,41 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
        return op0;
      }
  
-  icode = setcc_gen_code[(int) code];
-
-  if (icode != CODE_FOR_nothing)
+  mclass = GET_MODE_CLASS (mode);
+  for (compare_mode = mode; compare_mode != VOIDmode;
+       compare_mode = GET_MODE_WIDER_MODE (compare_mode))
      {
-      insn_operand_predicate_fn pred;
-
-      /* We think we may be able to do this with a scc insn.  Emit the
-        comparison and then the scc insn.  */
-
-      do_pending_stack_adjust ();
-      last = get_last_insn ();
-
-      comparison
-       = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
-      if (CONSTANT_P (comparison))
-       {
-         switch (GET_CODE (comparison))
-           {
-           case CONST_INT:
-             if (comparison == const0_rtx)
-               return const0_rtx;
-             break;
-             
-#ifdef FLOAT_STORE_FLAG_VALUE
-           case CONST_DOUBLE:
-             if (comparison == CONST0_RTX (GET_MODE (comparison)))
-               return const0_rtx;
-             break;
-#endif
-           default:
-             gcc_unreachable ();
-           }
-         
-         if (normalizep == 1)
-           return const1_rtx;
-         if (normalizep == -1)
-           return constm1_rtx;
-         return const_true_rtx;
-       }
-
-      /* The code of COMPARISON may not match CODE if compare_from_rtx
-        decided to swap its operands and reverse the original code.
-
-        We know that compare_from_rtx returns either a CONST_INT or
-        a new comparison code, so it is safe to just extract the
-        code from COMPARISON.  */
-      code = GET_CODE (comparison);
-
-      /* Get a reference to the target in the proper mode for this insn.  */
-      compare_mode = insn_data[(int) icode].operand[0].mode;
-      subtarget = target;
-      pred = insn_data[(int) icode].operand[0].predicate;
-      if (optimize || ! (*pred) (subtarget, compare_mode))
-       subtarget = gen_reg_rtx (compare_mode);
-
-      pattern = GEN_FCN (icode) (subtarget);
-      if (pattern)
-       {
-         emit_insn (pattern);
-         return emit_store_flag_1 (target, subtarget, compare_mode,
-                                   normalizep);
-       }
-    }
-  else
-    {
-      /* We don't have an scc insn, so try a cstore insn.  */
-
-      for (compare_mode = mode; compare_mode != VOIDmode;
-          compare_mode = GET_MODE_WIDER_MODE (compare_mode))
-       {
-         icode = optab_handler (cstore_optab, compare_mode)->insn_code;
-         if (icode != CODE_FOR_nothing)
-           break;
-       }
-
-      if (icode != CODE_FOR_nothing)
+     enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
+     icode = optab_handler (cstore_optab, optab_mode)->insn_code;
+     if (icode != CODE_FOR_nothing)
         {
+         rtx x, y;
           enum machine_mode result_mode
             = insn_data[(int) icode].operand[0].mode;
-         rtx cstore_op0 = op0;
-         rtx cstore_op1 = op1;
  
           do_pending_stack_adjust ();
           last = get_last_insn ();
  
-         if (compare_mode != mode)
+          x = prepare_operand (icode, op0, 2, mode, compare_mode, unsignedp);
+          y = prepare_operand (icode, op1, 3, mode, compare_mode, unsignedp);
+         comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
+         if (!x || !y
+             || !insn_data[icode].operand[2].predicate
+                 (x, insn_data[icode].operand[2].mode)
+             || !insn_data[icode].operand[3].predicate
+                 (y, insn_data[icode].operand[3].mode)
+             || !insn_data[icode].operand[1].predicate (comparison, VOIDmode))
             {
-             cstore_op0 = convert_modes (compare_mode, mode, cstore_op0,
-                                         unsignedp);
-             cstore_op1 = convert_modes (compare_mode, mode, cstore_op1,
-                                         unsignedp);
+             delete_insns_since (last);
+             continue;
             }
-         
-         if (!insn_data[(int) icode].operand[2].predicate (cstore_op0,
-                                                           compare_mode))
-           cstore_op0 = copy_to_mode_reg (compare_mode, cstore_op0);
  
-         if (!insn_data[(int) icode].operand[3].predicate (cstore_op1,
-                                                           compare_mode))
-           cstore_op1 = copy_to_mode_reg (compare_mode, cstore_op1);
-
-         comparison = gen_rtx_fmt_ee (code, result_mode, cstore_op0,
-                                      cstore_op1);
           subtarget = target;
-
           if (optimize || !(insn_data[(int) icode].operand[0].predicate
                             (subtarget, result_mode)))
             subtarget = gen_reg_rtx (result_mode);
  
-         pattern = GEN_FCN (icode) (subtarget, comparison, cstore_op0,
-                                    cstore_op1);
+         pattern = GEN_FCN (icode) (subtarget, comparison, x, y);
  
           if (pattern)
             {
@@ -5363,10 +5385,13 @@ emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
               return emit_store_flag_1 (target, subtarget, result_mode,
                                         normalizep);
             }
+
+         delete_insns_since (last);
+         break;
         }
      }
  
-  delete_insns_since (last);
+  last = get_last_insn ();
  
    /* If optimizing, use different pseudo registers for each insn, instead
       of reusing the same pseudo.  This leads to better CSE, but slows