2006-09-28 Steven G. Kargl <kargl@gcc.gnu.org>

[pf3gnuchains/gcc-fork.git] / gcc / optabs.c
diff --git a/gcc/optabs.c b/gcc/optabs.c

index a72028e..fe7ae06 100644 (file)
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -1,6 +1,6 @@
  /* Expand the basic unary and binary arithmetic operations, for GNU compiler.
     Copyright (C) 1987, 1988, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
-   1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
  
  This file is part of GCC.
  
@@ -294,6 +294,12 @@ optab_for_tree_code (enum tree_code code, tree type)
      case REALIGN_LOAD_EXPR:
        return vec_realign_load_optab;
  
+    case WIDEN_SUM_EXPR:
+      return TYPE_UNSIGNED (type) ? usum_widen_optab : ssum_widen_optab;
+
+    case DOT_PROD_EXPR:
+      return TYPE_UNSIGNED (type) ? udot_prod_optab : sdot_prod_optab;
+
      case REDUC_MAX_EXPR:
        return TYPE_UNSIGNED (type) ? reduc_umax_optab : reduc_smax_optab;
  
@@ -337,6 +343,154 @@ optab_for_tree_code (enum tree_code code, tree type)
  }
  \f
  
+/* Expand vector widening operations.
+
+   There are two different classes of operations handled here:
+   1) Operations whose result is wider than all the arguments to the operation.
+      Examples: VEC_UNPACK_HI/LO_EXPR, VEC_WIDEN_MULT_HI/LO_EXPR
+      In this case OP0 and optionally OP1 would be initialized,
+      but WIDE_OP wouldn't (not relevant for this case).
+   2) Operations whose result is of the same size as the last argument to the
+      operation, but wider than all the other arguments to the operation.
+      Examples: WIDEN_SUM_EXPR, VEC_DOT_PROD_EXPR.
+      In the case WIDE_OP, OP0 and optionally OP1 would be initialized.
+
+   E.g, when called to expand the following operations, this is how
+   the arguments will be initialized:
+                                nops    OP0     OP1     WIDE_OP
+   widening-sum                 2       oprnd0  -       oprnd1          
+   widening-dot-product         3       oprnd0  oprnd1  oprnd2
+   widening-mult                2       oprnd0  oprnd1  -
+   type-promotion (vec-unpack)  1       oprnd0  -       -  */
+
+rtx
+expand_widen_pattern_expr (tree exp, rtx op0, rtx op1, rtx wide_op, rtx target,
+                           int unsignedp)
+{   
+  tree oprnd0, oprnd1, oprnd2;
+  enum machine_mode wmode = 0, tmode0, tmode1 = 0;
+  optab widen_pattern_optab;
+  int icode; 
+  enum machine_mode xmode0, xmode1 = 0, wxmode = 0;
+  rtx temp;
+  rtx pat;
+  rtx xop0, xop1, wxop;
+  int nops = TREE_CODE_LENGTH (TREE_CODE (exp));
+
+  oprnd0 = TREE_OPERAND (exp, 0);
+  tmode0 = TYPE_MODE (TREE_TYPE (oprnd0));
+  widen_pattern_optab =
+        optab_for_tree_code (TREE_CODE (exp), TREE_TYPE (oprnd0));
+  icode = (int) widen_pattern_optab->handlers[(int) tmode0].insn_code;
+  gcc_assert (icode != CODE_FOR_nothing);
+  xmode0 = insn_data[icode].operand[1].mode;
+
+  if (nops >= 2)
+    {
+      oprnd1 = TREE_OPERAND (exp, 1);
+      tmode1 = TYPE_MODE (TREE_TYPE (oprnd1));
+      xmode1 = insn_data[icode].operand[2].mode;
+    }
+
+  /* The last operand is of a wider mode than the rest of the operands.  */
+  if (nops == 2)
+    {
+      wmode = tmode1;
+      wxmode = xmode1;
+    }
+  else if (nops == 3)
+    {
+      gcc_assert (tmode1 == tmode0);
+      gcc_assert (op1);
+      oprnd2 = TREE_OPERAND (exp, 2);
+      wmode = TYPE_MODE (TREE_TYPE (oprnd2));
+      wxmode = insn_data[icode].operand[3].mode;
+    }
+
+  if (!wide_op)
+    wmode = wxmode = insn_data[icode].operand[0].mode;
+
+  if (!target
+      || ! (*insn_data[icode].operand[0].predicate) (target, wmode))
+    temp = gen_reg_rtx (wmode);
+  else
+    temp = target;
+
+  xop0 = op0;
+  xop1 = op1;
+  wxop = wide_op;
+
+  /* In case the insn wants input operands in modes different from
+     those of the actual operands, convert the operands.  It would
+     seem that we don't need to convert CONST_INTs, but we do, so
+     that they're properly zero-extended, sign-extended or truncated
+     for their mode.  */
+
+  if (GET_MODE (op0) != xmode0 && xmode0 != VOIDmode)
+    xop0 = convert_modes (xmode0,
+                          GET_MODE (op0) != VOIDmode
+                          ? GET_MODE (op0)
+                          : tmode0,
+                          xop0, unsignedp);
+
+  if (op1)
+    if (GET_MODE (op1) != xmode1 && xmode1 != VOIDmode)
+      xop1 = convert_modes (xmode1,
+                            GET_MODE (op1) != VOIDmode
+                            ? GET_MODE (op1)
+                            : tmode1,
+                            xop1, unsignedp);
+
+  if (wide_op)
+    if (GET_MODE (wide_op) != wxmode && wxmode != VOIDmode)
+      wxop = convert_modes (wxmode,
+                            GET_MODE (wide_op) != VOIDmode
+                            ? GET_MODE (wide_op)
+                            : wmode,
+                            wxop, unsignedp);
+
+  /* Now, if insn's predicates don't allow our operands, put them into
+     pseudo regs.  */
+
+  if (! (*insn_data[icode].operand[1].predicate) (xop0, xmode0)
+      && xmode0 != VOIDmode)
+    xop0 = copy_to_mode_reg (xmode0, xop0);
+
+  if (op1)
+    {
+      if (! (*insn_data[icode].operand[2].predicate) (xop1, xmode1)
+          && xmode1 != VOIDmode)
+        xop1 = copy_to_mode_reg (xmode1, xop1);
+
+      if (wide_op)
+        {
+          if (! (*insn_data[icode].operand[3].predicate) (wxop, wxmode)
+              && wxmode != VOIDmode)
+            wxop = copy_to_mode_reg (wxmode, wxop);
+
+          pat = GEN_FCN (icode) (temp, xop0, xop1, wxop);
+        }
+      else
+        pat = GEN_FCN (icode) (temp, xop0, xop1);
+    }
+  else
+    {
+      if (wide_op)
+        {
+          if (! (*insn_data[icode].operand[2].predicate) (wxop, wxmode)
+              && wxmode != VOIDmode)
+            wxop = copy_to_mode_reg (wxmode, wxop);
+
+          pat = GEN_FCN (icode) (temp, xop0, wxop);
+        }
+      else
+        pat = GEN_FCN (icode) (temp, xop0);
+    }
+
+  emit_insn (pat);
+  return temp;
+}
+
  /* Generate code to perform an operation specified by TERNARY_OPTAB
     on operands OP0, OP1 and OP2, with result having machine-mode MODE.
  
@@ -427,9 +581,14 @@ simplify_expand_binop (enum machine_mode mode, optab binoptab,
                        enum optab_methods methods)
  {
    if (CONSTANT_P (op0) && CONSTANT_P (op1))
-    return simplify_gen_binary (binoptab->code, mode, op0, op1);
-  else
-    return expand_binop (mode, binoptab, op0, op1, target, unsignedp, methods);
+    {
+      rtx x = simplify_binary_operation (binoptab->code, mode, op0, op1);
+
+      if (x)
+       return x;
+    }
+
+  return expand_binop (mode, binoptab, op0, op1, target, unsignedp, methods);
  }
  
  /* Like simplify_expand_binop, but always put the result in TARGET.
@@ -800,8 +959,10 @@ expand_doubleword_shift (enum machine_mode op1_mode, optab binoptab,
    subword_label = gen_label_rtx ();
    done_label = gen_label_rtx ();
  
+  NO_DEFER_POP;
    do_compare_rtx_and_jump (cmp1, cmp2, cmp_code, false, op1_mode,
                            0, 0, subword_label);
+  OK_DEFER_POP;
  
    if (!expand_superword_shift (binoptab, outof_input, superword_op1,
                                outof_target, into_target,
@@ -1399,7 +1560,7 @@ expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
           if (expand_doubleword_shift (op1_mode, binoptab,
                                        outof_input, into_input, op1,
                                        outof_target, into_target,
-                                      unsignedp, methods, shift_mask))
+                                      unsignedp, next_methods, shift_mask))
             {
               insns = get_insns ();
               end_sequence ();
@@ -1420,7 +1581,7 @@ expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
        && ashl_optab->handlers[(int) word_mode].insn_code != CODE_FOR_nothing
        && lshr_optab->handlers[(int) word_mode].insn_code != CODE_FOR_nothing)
      {
-      rtx insns, equiv_value;
+      rtx insns;
        rtx into_target, outof_target;
        rtx into_input, outof_input;
        rtx inter;
@@ -1520,20 +1681,12 @@ expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
  
        if (inter != 0)
         {
-         if (binoptab->code != UNKNOWN)
-           equiv_value = gen_rtx_fmt_ee (binoptab->code, mode, op0, op1);
-         else
-           equiv_value = 0;
-
-         /* We can't make this a no conflict block if this is a word swap,
-            because the word swap case fails if the input and output values
-            are in the same register.  */
-         if (shift_count != BITS_PER_WORD)
-           emit_no_conflict_block (insns, target, op0, op1, equiv_value);
-         else
-           emit_insn (insns);
-
-
+         /* One may be tempted to wrap the insns in a REG_NO_CONFLICT
+            block to help the register allocator a bit.  But a multi-word
+            rotate will need all the input bits when setting the output
+            bits, so there clearly is a conflict between the input and
+            output registers.  So we can't use a no-conflict block here.  */
+         emit_insn (insns);
           return target;
         }
      }
@@ -2570,7 +2723,7 @@ expand_unop (enum machine_mode mode, optab unoptab, rtx op0, rtx target,
  
        target = gen_reg_rtx (outmode);
        emit_libcall_block (insns, target, value,
-                         gen_rtx_fmt_e (unoptab->code, mode, op0));
+                         gen_rtx_fmt_e (unoptab->code, outmode, op0));
  
        return target;
      }
@@ -2747,15 +2900,8 @@ expand_abs (enum machine_mode mode, rtx op0, rtx target,
    emit_move_insn (target, op0);
    NO_DEFER_POP;
  
-  /* If this mode is an integer too wide to compare properly,
-     compare word by word.  Rely on CSE to optimize constant cases.  */
-  if (GET_MODE_CLASS (mode) == MODE_INT
-      && ! can_compare_p (GE, mode, ccp_jump))
-    do_jump_by_parts_greater_rtx (mode, 0, target, const0_rtx,
-                                 NULL_RTX, op1);
-  else
-    do_compare_rtx_and_jump (target, CONST0_RTX (mode), GE, 0, mode,
-                            NULL_RTX, NULL_RTX, op1);
+  do_compare_rtx_and_jump (target, CONST0_RTX (mode), GE, 0, mode,
+                          NULL_RTX, NULL_RTX, op1);
  
    op0 = expand_unop (mode, result_unsignedp ? neg_optab : negv_optab,
                       target, target, 0);
@@ -3074,6 +3220,38 @@ no_conflict_move_test (rtx dest, rtx set, void *p0)
      p->must_stay = true;
  }
  
+/* Encapsulate the block starting at FIRST and ending with LAST, which is
+   logically equivalent to EQUIV, so it gets manipulated as a unit if it
+   is possible to do so.  */
+
+static void
+maybe_encapsulate_block (rtx first, rtx last, rtx equiv)
+{
+  if (!flag_non_call_exceptions || !may_trap_p (equiv))
+    {
+      /* We can't attach the REG_LIBCALL and REG_RETVAL notes when the
+        encapsulated region would not be in one basic block, i.e. when
+        there is a control_flow_insn_p insn between FIRST and LAST.  */
+      bool attach_libcall_retval_notes = true;
+      rtx insn, next = NEXT_INSN (last);
+
+      for (insn = first; insn != next; insn = NEXT_INSN (insn))
+       if (control_flow_insn_p (insn))
+         {
+           attach_libcall_retval_notes = false;
+           break;
+         }
+
+      if (attach_libcall_retval_notes)
+       {
+         REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last,
+                                                REG_NOTES (first));
+         REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first,
+                                               REG_NOTES (last));
+       }
+    }
+}
+
  /* Emit code to perform a series of operations on a multi-word quantity, one
     word at a time.
  
@@ -3195,10 +3373,7 @@ emit_no_conflict_block (rtx insns, rtx target, rtx op0, rtx op1, rtx equiv)
    else
      first = NEXT_INSN (prev);
  
-  /* Encapsulate the block so it gets manipulated as a unit.  */
-  REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last,
-                                        REG_NOTES (first));
-  REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
+  maybe_encapsulate_block (first, last, equiv);
  
    return last;
  }
@@ -3352,30 +3527,7 @@ emit_libcall_block (rtx insns, rtx target, rtx result, rtx equiv)
    else
      first = NEXT_INSN (prev);
  
-  /* Encapsulate the block so it gets manipulated as a unit.  */
-  if (!flag_non_call_exceptions || !may_trap_p (equiv))
-    {
-      /* We can't attach the REG_LIBCALL and REG_RETVAL notes
-        when the encapsulated region would not be in one basic block,
-        i.e. when there is a control_flow_insn_p insn between FIRST and LAST.
-       */
-      bool attach_libcall_retval_notes = true;
-      next = NEXT_INSN (last);
-      for (insn = first; insn != next; insn = NEXT_INSN (insn))
-       if (control_flow_insn_p (insn))
-         {
-           attach_libcall_retval_notes = false;
-           break;
-         }
-
-      if (attach_libcall_retval_notes)
-       {
-         REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last,
-                                                REG_NOTES (first));
-         REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first,
-                                               REG_NOTES (last));
-       }
-    }
+  maybe_encapsulate_block (first, last, equiv);
  }
  \f
  /* Nonzero if we can perform a comparison of mode MODE straightforwardly.
@@ -3560,18 +3712,24 @@ prepare_cmp_insn (rtx *px, rtx *py, enum rtx_code *pcomparison, rtx size,
        result = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST_MAKE_BLOCK,
                                         word_mode, 2, x, mode, y, mode);
  
+      /* There are two kinds of comparison routines. Biased routines
+        return 0/1/2, and unbiased routines return -1/0/1. Other parts
+        of gcc expect that the comparison operation is equivalent
+        to the modified comparison. For signed comparisons compare the 
+        result against 1 in the biased case, and zero in the unbiased
+        case. For unsigned comparisons always compare against 1 after
+        biasing the unbiased result by adding 1. This gives us a way to
+        represent LTU. */
        *px = result;
        *pmode = word_mode;
-      if (TARGET_LIB_INT_CMP_BIASED)
-       /* Integer comparison returns a result that must be compared
-          against 1, so that even if we do an unsigned compare
-          afterward, there is still a value that can represent the
-          result "less than".  */
-       *py = const1_rtx;
-      else
+      *py = const1_rtx;
+
+      if (!TARGET_LIB_INT_CMP_BIASED)
         {
-         *py = const0_rtx;
-         *punsignedp = 1;
+         if (*punsignedp)
+           *px = plus_constant (result, 1);  
+         else
+           *py = const0_rtx;
         }
        return;
      }
@@ -4383,9 +4541,10 @@ expand_float (rtx to, rtx from, int unsignedp)
           }
        }
  
-  /* Unsigned integer, and no way to convert directly.
-     Convert as signed, then conditionally adjust the result.  */
-  if (unsignedp && can_do_signed)
+  /* Unsigned integer, and no way to convert directly.  For binary
+     floating point modes, convert as signed, then conditionally adjust
+     the result.  */
+  if (unsignedp && can_do_signed && !DECIMAL_FLOAT_MODE_P (GET_MODE (to)))
      {
        rtx label = gen_label_rtx ();
        rtx temp;
@@ -4590,7 +4749,7 @@ expand_fix (rtx to, rtx from, int unsignedp)
       This is not needed.  Consider, for instance conversion from SFmode
       into DImode.
  
-     The hot path trought the code is dealing with inputs smaller than 2^63
+     The hot path through the code is dealing with inputs smaller than 2^63
       and doing just the conversion, so there is no bits to lose.
  
       In the other path we know the value is positive in the range 2^63..2^64-1
@@ -4845,6 +5004,8 @@ static void
  init_floating_libfuncs (optab optable, const char *opname, int suffix)
  {
    init_libfuncs (optable, MIN_MODE_FLOAT, MAX_MODE_FLOAT, opname, suffix);
+  init_libfuncs (optable, MIN_MODE_DECIMAL_FLOAT, MAX_MODE_DECIMAL_FLOAT,
+                opname, suffix);
  }
  
  /* Initialize the libfunc fields of an entire group of entries of an
@@ -4979,7 +5140,7 @@ init_one_libfunc (const char *name)
  
    /* Zap the nonsensical SYMBOL_REF_DECL for this.  What we're left with
       are the flags assigned by targetm.encode_section_info.  */
-  SYMBOL_REF_DECL (symbol) = 0;
+  SET_SYMBOL_REF_DECL (symbol, 0);
  
    return symbol;
  }
@@ -5139,6 +5300,11 @@ init_optabs (void)
    reduc_splus_optab = init_optab (UNKNOWN);
    reduc_uplus_optab = init_optab (UNKNOWN);
  
+  ssum_widen_optab = init_optab (UNKNOWN);
+  usum_widen_optab = init_optab (UNKNOWN);
+  sdot_prod_optab = init_optab (UNKNOWN); 
+  udot_prod_optab = init_optab (UNKNOWN);
+
    vec_extract_optab = init_optab (UNKNOWN);
    vec_set_optab = init_optab (UNKNOWN);
    vec_init_optab = init_optab (UNKNOWN);
@@ -5262,16 +5428,32 @@ init_optabs (void)
    /* Conversions.  */
    init_interclass_conv_libfuncs (sfloat_optab, "float",
                                  MODE_INT, MODE_FLOAT);
+  init_interclass_conv_libfuncs (sfloat_optab, "float",
+                                MODE_INT, MODE_DECIMAL_FLOAT);
    init_interclass_conv_libfuncs (ufloat_optab, "floatun",
                                  MODE_INT, MODE_FLOAT);
+  init_interclass_conv_libfuncs (ufloat_optab, "floatun",
+                                MODE_INT, MODE_DECIMAL_FLOAT);
    init_interclass_conv_libfuncs (sfix_optab, "fix",
                                  MODE_FLOAT, MODE_INT);
+  init_interclass_conv_libfuncs (sfix_optab, "fix",
+                                MODE_DECIMAL_FLOAT, MODE_INT);
    init_interclass_conv_libfuncs (ufix_optab, "fixuns",
                                  MODE_FLOAT, MODE_INT);
+  init_interclass_conv_libfuncs (ufix_optab, "fixuns",
+                                MODE_DECIMAL_FLOAT, MODE_INT);
+  init_interclass_conv_libfuncs (ufloat_optab, "floatuns",
+                                MODE_INT, MODE_DECIMAL_FLOAT);
  
    /* sext_optab is also used for FLOAT_EXTEND.  */
    init_intraclass_conv_libfuncs (sext_optab, "extend", MODE_FLOAT, true);
+  init_intraclass_conv_libfuncs (sext_optab, "extend", MODE_DECIMAL_FLOAT, true);
+  init_interclass_conv_libfuncs (sext_optab, "extend", MODE_FLOAT, MODE_DECIMAL_FLOAT);
+  init_interclass_conv_libfuncs (sext_optab, "extend", MODE_DECIMAL_FLOAT, MODE_FLOAT);
    init_intraclass_conv_libfuncs (trunc_optab, "trunc", MODE_FLOAT, false);
+  init_intraclass_conv_libfuncs (trunc_optab, "trunc", MODE_DECIMAL_FLOAT, false);
+  init_interclass_conv_libfuncs (trunc_optab, "trunc", MODE_FLOAT, MODE_DECIMAL_FLOAT);
+  init_interclass_conv_libfuncs (trunc_optab, "trunc", MODE_DECIMAL_FLOAT, MODE_FLOAT);
  
    /* Use cabs for double complex abs, since systems generally have cabs.
       Don't define any libcall for float complex, so that cabs will be used.  */