OSDN Git Service

Add support for 3Dnow builtins
authorbernds <bernds@138bc75d-0d04-0410-961f-82ee72b054a4>
Fri, 28 Sep 2001 18:00:35 +0000 (18:00 +0000)
committerbernds <bernds@138bc75d-0d04-0410-961f-82ee72b054a4>
Fri, 28 Sep 2001 18:00:35 +0000 (18:00 +0000)
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@45863 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/c-common.c
gcc/config/i386/i386.c
gcc/config/i386/i386.h
gcc/config/i386/i386.md
gcc/tree.c
gcc/tree.h

index cfedd18..aaaaa30 100644 (file)
@@ -1,3 +1,34 @@
+2001-09-25  Bernd Schmidt  <bernds@redhat.com>
+
+       Mostly from Graham Stott  <grahams@redhat.com>
+       * c-common.c (type_for_mode): Add support for V2SFmode.
+       * tree.c (build_common_tree_nodes_2): Likewise.
+       * tree.h (enum tree_index, global_trees): Likewise.
+       * config/i386/i386.c (x86_3dnow_a): New variable.
+       (override_options): Support 3Dnow extensions.
+       (bdesc_2arg, bdesc_1arg): Some SSE instructions are also part of
+       Athlon's version of 3Dnow.
+       (ix86_init_mmx_sse_builtins): Create 3Dnow builtins.
+       (ix86_expand_builtin): Handle them.
+       (ix86_hard_regno_mode_ok): Support V2SFmode if using 3Dnow.
+       * config/i386/i386.h (MASK_3DNOW, MASK_3DNOW_A, TARGET_3DNOW,
+       TARGET_3DNOW_A): New macros.
+       (TARGET_SWITCHES): Add 3Dnow switches.
+       (VALID_MMX_REG_MODE_3DNOW): New macro.
+       (VECTOR_MODE_SUPPORTED_P): Use it.
+       (enum ix86_builtins): Add entries for 3Dnow builtins.
+       * config/i386/i386.md (movv2sf_internal, movv2sf, pushv2sf, pf2id,
+       pf2iw, addv2sf3, subv2sf3, subrv2sf3, gtv2sf3, gev2sf3, eqv2sf3,
+       pfmaxv23sf3, pfminv2sf3, mulv2sf3, femms, prefetch_3dnow, prefetchw,
+       pfacc, pfnacc, pfpnacc, pi2fw, floatv2si2, pavgusb, pfrcpv2sf2,
+       pfrcpit1v2sf3, pfrcpit2v2sf3, pfrsqrtv2sf2, pfrsqit1v2sf3,
+       pmulhrwvhi3, pswapdv2si2, pswapdv2sf2): New patterns.
+       (mmx_pmovmskb, mmx_maskmovq, sse_movntdi, umulv4hi3_highpart,
+       mmx_uavgv8qi3, mmx_uavgv4hi3, mmx_psadbw, mmx_pinsrw, mmx_pextrw,
+       mmx_pshufw, umaxv8qi3, smaxv4hi3, uminv8qi3, sminv4hi3, sfence,
+       sfence_insn, prefetch): Make these available if TARGET_SSE or
+       TARGET_3DNOW_A.
+       
 Fri Sep 28 19:18:40 CEST 2001  Jan Hubicka  <jh@suse.cz>
 
        * i386-protos.h (ix86_setup_incoming_varargs, ix86_va_arg,
index 74a5552..190c338 100644 (file)
@@ -1347,6 +1347,8 @@ type_for_mode (mode, unsignedp)
     return V4HI_type_node;
   if (mode == TYPE_MODE (V8QI_type_node) && VECTOR_MODE_SUPPORTED_P (mode))
     return V8QI_type_node;
+  if (mode == TYPE_MODE (V2SF_type_node) && VECTOR_MODE_SUPPORTED_P (mode))
+    return V2SF_type_node;
 #endif
 
   return 0;
index 889c428..f8b65da 100644 (file)
@@ -323,6 +323,7 @@ const int x86_double_with_add = ~m_386;
 const int x86_use_bit_test = m_386;
 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
+const int x86_3dnow_a = m_ATHLON;
 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
 const int x86_branch_hints = m_PENT4;
 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
@@ -988,6 +989,15 @@ override_options ()
   if (TARGET_SSE)
     target_flags |= MASK_MMX;
 
+  /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
+  if (TARGET_3DNOW)
+    {
+      target_flags |= MASK_MMX;
+      /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
+        extensions it adds.  */
+      if (x86_3dnow_a & (1 << ix86_arch))
+       target_flags |= MASK_3DNOW_A;
+    }
   if ((x86_accumulate_outgoing_args & CPUMASK)
       && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
       && !optimize_size)
@@ -10731,15 +10741,15 @@ static struct builtin_description bdesc_2arg[] =
 
   { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
   { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
-  { MASK_SSE, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
 
   { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
   { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
   { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
   { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
 
-  { MASK_SSE, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
-  { MASK_SSE, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
 
   { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
   { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
@@ -10748,10 +10758,10 @@ static struct builtin_description bdesc_2arg[] =
   { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
   { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
 
-  { MASK_SSE, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
-  { MASK_SSE, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
-  { MASK_SSE, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
-  { MASK_SSE, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
 
   { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
   { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
@@ -10794,7 +10804,7 @@ static struct builtin_description bdesc_2arg[] =
 
 static struct builtin_description bdesc_1arg[] =
 {
-  { MASK_SSE, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
+  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
   { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
 
   { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
@@ -11034,6 +11044,40 @@ ix86_init_mmx_sse_builtins ()
                                                 long_long_unsigned_type_node,
                                                 endlink)));
 
+  tree v2si_ftype_v2sf
+    = build_function_type (V2SI_type_node,
+                           tree_cons (NULL_TREE, V2SF_type_node,
+                                      endlink));
+  tree v2sf_ftype_v2si
+    = build_function_type (V2SF_type_node,
+                           tree_cons (NULL_TREE, V2SI_type_node,
+                                      endlink));
+  tree v2si_ftype_v2si
+    = build_function_type (V2SI_type_node,
+                           tree_cons (NULL_TREE, V2SI_type_node,
+                                      endlink));
+  tree v2sf_ftype_v2sf
+    = build_function_type (V2SF_type_node,
+                           tree_cons (NULL_TREE, V2SF_type_node,
+                                      endlink));
+  tree v2sf_ftype_v2sf_v2sf
+    = build_function_type (V2SF_type_node,
+                           tree_cons (NULL_TREE, V2SF_type_node,
+                                      tree_cons (NULL_TREE,
+                                                 V2SF_type_node,
+                                                 endlink)));
+  tree v2si_ftype_v2sf_v2sf
+    = build_function_type (V2SI_type_node,
+                           tree_cons (NULL_TREE, V2SF_type_node,
+                                      tree_cons (NULL_TREE,
+                                                 V2SF_type_node,
+                                                 endlink)));
+
+  tree void_ftype_pchar
+    = build_function_type (void_type_node,
+                           tree_cons (NULL_TREE, pchar_type_node,
+                                      endlink));
+
   /* Add all builtins that are more or less simple operations on two
      operands.  */
   for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
@@ -11047,9 +11091,6 @@ ix86_init_mmx_sse_builtins ()
        continue;
       mode = insn_data[d->icode].operand[1].mode;
 
-      if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
-       continue;
-
       switch (mode)
        {
        case V4SFmode:
@@ -11121,10 +11162,10 @@ ix86_init_mmx_sse_builtins ()
   def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
   def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
 
-  def_builtin (MASK_SSE, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
-  def_builtin (MASK_SSE, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
 
-  def_builtin (MASK_SSE, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
 
   def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
   def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
@@ -11139,14 +11180,14 @@ ix86_init_mmx_sse_builtins ()
   def_builtin (MASK_SSE, "__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
 
   def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
-  def_builtin (MASK_SSE, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
   def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
-  def_builtin (MASK_SSE, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
 
-  def_builtin (MASK_SSE, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
-  def_builtin (MASK_SSE, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
 
-  def_builtin (MASK_SSE, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
+  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
 
   def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
   def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
@@ -11157,6 +11198,38 @@ ix86_init_mmx_sse_builtins ()
 
   def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
 
+  /* Original 3DNow!  */
+  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
+  def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
+
+  /* 3DNow! extension as used in the Athlon CPU.  */
+  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
+  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
+  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
+  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
+  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
+  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
+
   /* Composite intrinsics.  */
   def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
   def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
@@ -11179,7 +11252,7 @@ safe_vector_operand (x, mode)
     return x;
   x = gen_reg_rtx (mode);
 
-  if (VALID_MMX_REG_MODE (mode))
+  if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
     emit_insn (gen_mmx_clrdi (mode == DImode ? x
                              : gen_rtx_SUBREG (DImode, x, 0)));
   else
@@ -11739,6 +11812,107 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
       emit_insn (pat);
       return target;
 
+    case IX86_BUILTIN_FEMMS:
+      emit_insn (gen_femms ());
+      return NULL_RTX;
+
+    case IX86_BUILTIN_PAVGUSB:
+      return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
+
+    case IX86_BUILTIN_PF2ID:
+      return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
+
+    case IX86_BUILTIN_PFACC:
+      return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
+
+    case IX86_BUILTIN_PFADD:
+     return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFCMPEQ:
+      return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFCMPGE:
+      return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFCMPGT:
+      return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFMAX:
+      return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFMIN:
+      return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFMUL:
+      return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFRCP:
+      return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
+
+    case IX86_BUILTIN_PFRCPIT1:
+      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFRCPIT2:
+      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFRSQIT1:
+      return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFRSQRT:
+      return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
+
+    case IX86_BUILTIN_PFSUB:
+      return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
+
+    case IX86_BUILTIN_PFSUBR:
+      return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
+
+    case IX86_BUILTIN_PI2FD:
+      return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
+
+    case IX86_BUILTIN_PMULHRW:
+      return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
+
+    case IX86_BUILTIN_PREFETCH_3DNOW:
+      icode = CODE_FOR_prefetch_3dnow;
+      arg0 = TREE_VALUE (arglist);
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      mode0 = insn_data[icode].operand[0].mode;
+      pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
+      if (! pat)
+        return NULL_RTX;
+      emit_insn (pat);
+      return NULL_RTX;
+
+    case IX86_BUILTIN_PREFETCHW:
+      icode = CODE_FOR_prefetchw;
+      arg0 = TREE_VALUE (arglist);
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      mode0 = insn_data[icode].operand[0].mode;
+      pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
+      if (! pat)
+        return NULL_RTX;
+      emit_insn (pat);
+      return NULL_RTX;
+
+    case IX86_BUILTIN_PF2IW:
+      return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
+
+    case IX86_BUILTIN_PFNACC:
+      return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
+
+    case IX86_BUILTIN_PFPNACC:
+      return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
+
+    case IX86_BUILTIN_PI2FW:
+      return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
+
+    case IX86_BUILTIN_PSWAPDSI:
+      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
+
+    case IX86_BUILTIN_PSWAPDSF:
+      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
+
       /* Composite intrinsics.  */
     case IX86_BUILTIN_SETPS1:
       target = assign_386_stack_local (SFmode, 0);
@@ -12055,7 +12229,7 @@ ix86_hard_regno_mode_ok (regno, mode)
   if (SSE_REGNO_P (regno))
     return VALID_SSE_REG_MODE (mode);
   if (MMX_REGNO_P (regno))
-    return VALID_MMX_REG_MODE (mode);
+    return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
   /* We handle both integer and floats in the general purpose registers.
      In future we should be able to handle vector modes as well.  */
   if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
index 654d8fb..fc6f1c9 100644 (file)
@@ -118,10 +118,12 @@ extern int target_flags;
 #define MASK_MMX               0x00020000      /* Support MMX regs/builtins */
 #define MASK_SSE               0x00040000      /* Support SSE regs/builtins */
 #define MASK_SSE2              0x00080000      /* Support SSE2 regs/builtins */
-#define MASK_128BIT_LONG_DOUBLE 0x00100000     /* long double size is 128bit */
-#define MASK_MIX_SSE_I387      0x00200000      /* Mix SSE and i387 instructions */
-#define MASK_64BIT             0x00400000      /* Produce 64bit code */
-#define MASK_NO_RED_ZONE       0x00800000      /* Do not use red zone */
+#define MASK_3DNOW             0x00100000      /* Support 3Dnow builtins */
+#define MASK_3DNOW_A           0x00200000      /* Support Athlon 3Dnow builtins */
+#define MASK_128BIT_LONG_DOUBLE 0x00400000     /* long double size is 128bit */
+#define MASK_MIX_SSE_I387      0x00800000      /* Mix SSE and i387 instructions */
+#define MASK_64BIT             0x01000000      /* Produce 64bit code */
+#define MASK_NO_RED_ZONE       0x02000000      /* Do not use red zone */
 
 /* Temporary codegen switches */
 #define MASK_INTEL_SYNTAX      0x00000200
@@ -264,6 +266,8 @@ extern const int x86_epilogue_using_move;
 #define TARGET_SSE2 ((target_flags & MASK_SSE2) != 0)
 #define TARGET_MIX_SSE_I387 ((target_flags & MASK_MIX_SSE_I387) != 0)
 #define TARGET_MMX ((target_flags & MASK_MMX) != 0)
+#define TARGET_3DNOW ((target_flags & MASK_3DNOW) != 0)
+#define TARGET_3DNOW_A ((target_flags & MASK_3DNOW_A) != 0)
 
 #define TARGET_RED_ZONE (!(target_flags & MASK_NO_RED_ZONE))
 
@@ -335,6 +339,10 @@ extern const int x86_epilogue_using_move;
   { "mmx",                      MASK_MMX, N_("Support MMX builtins") },      \
   { "no-mmx",                  -MASK_MMX,                                    \
     N_("Do not support MMX builtins") },                                     \
+  { "3dnow",                     MASK_3DNOW,                                 \
+    N_("Support 3DNow! builtins") },                                         \
+  { "no-3dnow",                 -MASK_3DNOW,                                 \
+    N_("Do not support 3DNow! builtins") },                                  \
   { "sse",                      MASK_SSE,                                    \
     N_("Support MMX and SSE builtins and code generation") },                \
   { "no-sse",                  -MASK_SSE,                                    \
@@ -918,13 +926,17 @@ extern int ix86_arch;
      || (MODE) == SFmode \
      || (TARGET_SSE2 && ((MODE) == DFmode || VALID_MMX_REG_MODE (MODE))))
 
+#define VALID_MMX_REG_MODE_3DNOW(MODE) \
+    ((MODE) == V2SFmode || (MODE) == SFmode)
+
 #define VALID_MMX_REG_MODE(MODE) \
     ((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode \
      || (MODE) == V2SImode || (MODE) == SImode)
 
 #define VECTOR_MODE_SUPPORTED_P(MODE)                                  \
     (VALID_SSE_REG_MODE (MODE) && TARGET_SSE ? 1                       \
-     : VALID_MMX_REG_MODE (MODE) && TARGET_MMX ? 1 : 0)
+     : VALID_MMX_REG_MODE (MODE) && TARGET_MMX ? 1                     \
+     : VALID_MMX_REG_MODE_3DNOW (MODE) && TARGET_3DNOW ? 1 : 0)
 
 #define VALID_FP_MODE_P(mode) \
     ((mode) == SFmode || (mode) == DFmode || (mode) == TFmode  \
@@ -2204,6 +2216,38 @@ enum ix86_builtins
   IX86_BUILTIN_SFENCE,
   IX86_BUILTIN_PREFETCH,
 
+  /* 3DNow! Original */
+  IX86_BUILTIN_FEMMS,
+  IX86_BUILTIN_PAVGUSB,
+  IX86_BUILTIN_PF2ID,
+  IX86_BUILTIN_PFACC,
+  IX86_BUILTIN_PFADD,
+  IX86_BUILTIN_PFCMPEQ,
+  IX86_BUILTIN_PFCMPGE,
+  IX86_BUILTIN_PFCMPGT,
+  IX86_BUILTIN_PFMAX,
+  IX86_BUILTIN_PFMIN,
+  IX86_BUILTIN_PFMUL,
+  IX86_BUILTIN_PFRCP,
+  IX86_BUILTIN_PFRCPIT1,
+  IX86_BUILTIN_PFRCPIT2,
+  IX86_BUILTIN_PFRSQIT1,
+  IX86_BUILTIN_PFRSQRT,
+  IX86_BUILTIN_PFSUB,
+  IX86_BUILTIN_PFSUBR,
+  IX86_BUILTIN_PI2FD,
+  IX86_BUILTIN_PMULHRW,
+  IX86_BUILTIN_PREFETCH_3DNOW, /* PREFETCH already used */
+  IX86_BUILTIN_PREFETCHW,
+
+  /* 3DNow! Athlon Extensions */
+  IX86_BUILTIN_PF2IW,
+  IX86_BUILTIN_PFNACC,
+  IX86_BUILTIN_PFPNACC,
+  IX86_BUILTIN_PI2FW,
+  IX86_BUILTIN_PSWAPDSI,
+  IX86_BUILTIN_PSWAPDSF,
+
   /* Composite builtins, expand to more than one insn.  */
   IX86_BUILTIN_SETPS1,
   IX86_BUILTIN_SETPS,
index 5119db6..3b98788 100644 (file)
 ;; 43 This is a `rsqsrt' operation.
 ;; 44 This is a `sfence' operation.
 ;; 45 This is a noop to prevent excessive combiner cleverness.
+;; 46 This is a `femms' operation.
+;; 47 This is a `prefetch' (3DNow) operation.
+;; 48 This is a `prefetchw' operation.
+;; 49 This is a 'pavgusb' operation.
+;; 50 This is a `pfrcp' operation.
+;; 51 This is a `pfrcpit1' operation.
+;; 52 This is a `pfrcpit2' operation.
+;; 53 This is a `pfrsqrt' operation.
+;; 54 This is a `pfrsqrit1' operation.
 
 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
 ;; from i386.c.
   "movq\t{%1, %0|%0, %1}"
   [(set_attr "type" "mmx")])
 
+(define_insn "movv2sf_internal"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m")
+        (match_operand:V2SF 1 "general_operand" "ym,y"))]
+  "TARGET_3DNOW"
+  "movq\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
 (define_expand "movti"
   [(set (match_operand:TI 0 "general_operand" "")
        (match_operand:TI 1 "general_operand" ""))]
     }
 })
 
+(define_expand "movv2sf"
+  [(set (match_operand:V2SF 0 "general_operand" "")
+       (match_operand:V2SF 1 "general_operand" ""))]
+   "TARGET_3DNOW"
+   "
+{
+  /* For constants other than zero into memory.  We do not know how the
+     instructions used to build constants modify the upper 64 bits
+     of the register, once we have that information we may be able
+     to handle some of them more efficiently.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && register_operand (operands[0], V2SFmode)
+      && CONSTANT_P (operands[1]))
+    {
+      rtx addr = gen_reg_rtx (Pmode);
+
+      emit_move_insn (addr,
+                     XEXP (force_const_mem (V2SFmode, operands[1]), 0));
+      operands[1] = gen_rtx_MEM (V2SFmode, addr);
+   }
+
+  /* Make operand1 a register is it isn't already.  */
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], V2SFmode)
+      && !register_operand (operands[1], V2SFmode)
+      && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
+      && operands[1] != CONST0_RTX (V2SFmode))
+   {
+      rtx temp = force_reg (V2SFmode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
 (define_insn_and_split "*pushti"
   [(set (match_operand:TI 0 "push_operand" "=<")
        (match_operand:TI 1 "nonmemory_operand" "x"))]
   ""
   [(set_attr "type" "mmx")])
 
+(define_insn_and_split "*pushv2sf"
+  [(set (match_operand:V2SF 0 "push_operand" "=<")
+       (match_operand:V2SF 1 "nonmemory_operand" "y"))]
+  "TARGET_3DNOW"
+  "#"
+  ""
+  [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+   (set (mem:V2SF (reg:SI 7)) (match_dup 1))]
+  ""
+  [(set_attr "type" "mmx")])
+
 (define_insn "movti_internal"
   [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m")
        (match_operand:TI 1 "general_operand" "xm,x"))]
 (define_insn "mmx_pmovmskb"
   [(set (match_operand:SI 0 "register_operand" "=r")
        (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] 33))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pmovmskb\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")])
 
   [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D"))
        (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
                      (match_operand:V8QI 2 "register_operand" "y")] 32))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   ;; @@@ check ordering of operands in intel/nonintel syntax
   "maskmovq\t{%2, %1|%1, %2}"
   [(set_attr "type" "sse")])
 (define_insn "sse_movntdi"
   [(set (match_operand:DI 0 "memory_operand" "=m")
        (unspec:DI [(match_operand:DI 1 "register_operand" "y")] 34))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "movntq\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")])
 
          (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
                     (zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
          (const_int 16))))]
-  "TARGET_MMX"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pmulhuw\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmx")])
 
                                               (const_int 1)
                                               (const_int 1)])))
         (const_int 1)))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pavgb\t{%2, %0|%0, %2}"
   [(set_attr "type" "sse")])
 
                                               (const_int 1)
                                               (const_int 1)])))
         (const_int 1)))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pavgw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sse")])
 
   [(set (match_operand:V8QI 0 "register_operand" "=y")
         (abs:V8QI (minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
                              (match_operand:V8QI 2 "nonimmediate_operand" "ym"))))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "psadbw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sse")])
 
                        (vec_duplicate:V4HI
                         (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "rm")))
                        (match_operand:SI 3 "immediate_operand" "i")))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pinsrw\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "sse")])
 
         (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y")
                                       (parallel
                                        [(match_operand:SI 2 "immediate_operand" "i")]))))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pextrw\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sse")])
 
         (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "0")
                      (match_operand:V4HI 2 "nonimmediate_operand" "ym")
                      (match_operand:SI 3 "immediate_operand" "i")] 41))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pshufw\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "sse")])
 
   [(set (match_operand:V8QI 0 "register_operand" "=y")
         (umax:V8QI (match_operand:V8QI 1 "register_operand" "0")
                   (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pmaxub\t{%2, %0|%0, %2}"
   [(set_attr "type" "sse")])
 
   [(set (match_operand:V4HI 0 "register_operand" "=y")
         (smax:V4HI (match_operand:V4HI 1 "register_operand" "0")
                   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pmaxsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sse")])
 
   [(set (match_operand:V8QI 0 "register_operand" "=y")
         (umin:V8QI (match_operand:V8QI 1 "register_operand" "0")
                   (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pminub\t{%2, %0|%0, %2}"
   [(set_attr "type" "sse")])
 
   [(set (match_operand:V4HI 0 "register_operand" "=y")
         (smin:V4HI (match_operand:V4HI 1 "register_operand" "0")
                   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "pminsw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sse")])
 
 (define_expand "sfence"
   [(set (match_dup 0)
        (unspec:BLK [(match_dup 0)] 44))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
 {
   operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
   MEM_VOLATILE_P (operands[0]) = 1;
 (define_insn "*sfence_insn"
   [(set (match_operand:BLK 0 "" "")
        (unspec:BLK [(match_dup 0)] 44))]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
   "sfence"
   [(set_attr "type" "sse")
    (set_attr "memory" "unknown")])
 (define_insn "prefetch"
   [(unspec [(match_operand:SI 0 "address_operand" "p")
            (match_operand:SI 1 "immediate_operand" "n")] 35)]
-  "TARGET_SSE"
+  "TARGET_SSE || TARGET_3DNOW_A"
 {
   switch (INTVAL (operands[1]))
     {
    (set_attr "memory" "store")
    (set_attr "modrm" "0")
    (set_attr "mode" "DI")])
+
+;; 3Dnow! instructions
+
+(define_insn "addv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (plus:V2SF (match_operand:V2SF 1 "register_operand" "0")
+                  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfadd\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "subv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (minus:V2SF (match_operand:V2SF 1 "register_operand" "0")
+                   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfsub\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "subrv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+                    (match_operand:V2SF 1 "register_operand" "0")))]
+  "TARGET_3DNOW"
+  "pfsubr\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "gtv2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
+                (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+  "pfcmpgt\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "gev2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (ge:V2SI (match_operand:V2SF 1 "register_operand" "0")
+                (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfcmpge\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "eqv2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (eq:V2SI (match_operand:V2SF 1 "register_operand" "0")
+                (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfcmpeq\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pfmaxv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (smax:V2SF (match_operand:V2SF 1 "register_operand" "0")
+                   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfmax\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pfminv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (smin:V2SF (match_operand:V2SF 1 "register_operand" "0")
+                   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfmin\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "mulv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (mult:V2SF (match_operand:V2SF 1 "register_operand" "0")
+                  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfmul\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "femms"
+  [(unspec_volatile [(const_int 0)] 46)
+   (clobber (reg:XF 8))
+   (clobber (reg:XF 9))
+   (clobber (reg:XF 10))
+   (clobber (reg:XF 11))
+   (clobber (reg:XF 12))
+   (clobber (reg:XF 13))
+   (clobber (reg:XF 14))
+   (clobber (reg:XF 15))
+   (clobber (reg:DI 29))
+   (clobber (reg:DI 30))
+   (clobber (reg:DI 31))
+   (clobber (reg:DI 32))
+   (clobber (reg:DI 33))
+   (clobber (reg:DI 34))
+   (clobber (reg:DI 35))
+   (clobber (reg:DI 36))]
+  "TARGET_3DNOW"
+  "femms"
+  [(set_attr "type" "mmx")])
+
+(define_insn "prefetch_3dnow"
+  [(unspec [(match_operand:SI 0 "address_operand" "p")] 47)]
+  "TARGET_3DNOW"
+  "prefetch\\t%a0"
+  [(set_attr "type" "mmx")])
+
+(define_insn "prefetchw"
+  [(unspec [(match_operand:SI 0 "address_operand" "p")] 48)]
+  "TARGET_3DNOW"
+  "prefetchw\\t%a0"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pf2id"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pf2id\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pf2iw"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (sign_extend:V2SI
+          (ss_truncate:V2HI
+             (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))]
+  "TARGET_3DNOW_A"
+  "pf2iw\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pfacc"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (vec_concat:V2SF
+          (plus:SF
+             (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
+                            (parallel [(const_int  0)]))
+             (vec_select:SF (match_dup 1)
+                            (parallel [(const_int 1)])))
+           (plus:SF
+              (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
+                            (parallel [(const_int  0)]))
+              (vec_select:SF (match_dup 2)
+                            (parallel [(const_int 1)])))))]
+  "TARGET_3DNOW"
+  "pfacc\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pfnacc"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (vec_concat:V2SF
+           (minus:SF
+              (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
+                            (parallel [(const_int 0)]))
+              (vec_select:SF (match_dup 1)
+                            (parallel [(const_int 1)])))
+           (minus:SF
+              (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
+                            (parallel [(const_int  0)]))
+              (vec_select:SF (match_dup 2)
+                            (parallel [(const_int 1)])))))]
+  "TARGET_3DNOW_A"
+  "pfnacc\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pfpnacc"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (vec_concat:V2SF
+           (minus:SF
+              (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
+                            (parallel [(const_int 0)]))
+              (vec_select:SF (match_dup 1)
+                            (parallel [(const_int 1)])))
+           (plus:SF
+              (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
+                            (parallel [(const_int 0)]))
+              (vec_select:SF (match_dup 2)
+                            (parallel [(const_int 1)])))))]
+  "TARGET_3DNOW_A"
+  "pfpnacc\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pi2fw"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (float:V2SF
+          (vec_concat:V2SI
+             (sign_extend:SI
+                (truncate:HI
+                   (vec_select:SI (match_operand:V2SI 1 "nonimmediate_operand" "ym")
+                                  (parallel [(const_int 0)]))))
+              (sign_extend:SI
+                (truncate:HI
+                    (vec_select:SI (match_dup 1)
+                                  (parallel [(const_int  1)])))))))]
+  "TARGET_3DNOW_A"
+  "pi2fw\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "floatv2si2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pi2fd\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+;; This insn is identical to pavgb in operation, but the opcode is
+;; different.  To avoid accidentally matching pavgb, use an unspec.
+
+(define_insn "pavgusb"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+       (unspec:V8QI
+          [(match_operand:V8QI 1 "register_operand" "0")
+           (match_operand:V8QI 2 "nonimmediate_operand" "ym")] 49))]
+  "TARGET_3DNOW"
+  "pavgusb\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+;; 3DNow reciprical and sqrt
+(define_insn "pfrcpv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 50))]
+  "TARGET_3DNOW"
+  "pfrcp\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pfrcpit1v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+                     (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 51))]
+  "TARGET_3DNOW"
+  "pfrcpit1\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pfrcpit2v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+                     (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 52))]
+  "TARGET_3DNOW"
+  "pfrcpit2\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pfrsqrtv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 53))]
+  "TARGET_3DNOW"
+   "pfrsqrt\\t{%1, %0|%0, %1}"
+   [(set_attr "type" "mmx")])
+               
+(define_insn "pfrsqit1v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+                     (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 54))]
+  "TARGET_3DNOW"
+  "pfrsqit1\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pmulhrwv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+       (truncate:V4HI
+          (lshiftrt:V4SI
+             (plus:V4SI
+                (mult:V4SI
+                   (sign_extend:V4SI
+                      (match_operand:V4HI 1 "register_operand" "0"))
+                   (sign_extend:V4SI
+                      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+             (vec_const:V4SI
+                (parallel [(const_int 0x8000)
+                           (const_int 0x8000)
+                           (const_int 0x8000)
+                           (const_int 0x8000)])))
+          (const_int 16))))]
+  "TARGET_3DNOW"
+  "pmulhrw\\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pswapdv2si2"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+       (vec_select:V2SI (match_operand:V2SI 1 "nonimmediate_operand" "ym")
+                        (parallel [(const_int 1) (const_int 0)])))]
+  "TARGET_3DNOW_A"
+  "pswapd\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
+
+(define_insn "pswapdv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+       (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym")
+                        (parallel [(const_int 1) (const_int 0)])))]
+  "TARGET_3DNOW_A"
+  "pswapd\\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")])
index 8a8d22d..62b0760 100644 (file)
@@ -4877,4 +4877,9 @@ build_common_tree_nodes_2 (short_double)
   TREE_TYPE (V8QI_type_node) = intQI_type_node;
   TYPE_MODE (V8QI_type_node) = V8QImode;
   finish_vector_type (V8QI_type_node);
+
+  V2SF_type_node = make_node (VECTOR_TYPE);
+  TREE_TYPE (V2SF_type_node) = float_type_node;
+  TYPE_MODE (V2SF_type_node) = V2SFmode;
+  finish_vector_type (V2SF_type_node);
 }
index b95a4aa..d4306a8 100644 (file)
@@ -1846,6 +1846,7 @@ enum tree_index
   TI_V8QI_TYPE,
   TI_V4HI_TYPE,
   TI_V2SI_TYPE,
+  TI_V2SF_TYPE,
 
   TI_MAIN_IDENTIFIER,
 
@@ -1911,6 +1912,7 @@ extern tree global_trees[TI_MAX];
 #define V8QI_type_node                 global_trees[TI_V8QI_TYPE]
 #define V4HI_type_node                 global_trees[TI_V4HI_TYPE]
 #define V2SI_type_node                 global_trees[TI_V2SI_TYPE]
+#define V2SF_type_node                 global_trees[TI_V2SF_TYPE]
 
 /* An enumeration of the standard C integer types.  These must be
    ordered so that shorter types appear before longer ones.  */