def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
emit_insn (pat);
return target;
+ case IX86_BUILTIN_PSLLDQI128:
+ case IX86_BUILTIN_PSRLDQI128:
+ icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
+ : CODE_FOR_sse2_lshrti3);
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ {
+ op0 = copy_to_reg (op0);
+ op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
+ }
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ {
+ error ("shift must be an immediate");
+ return const0_rtx;
+ }
+ target = gen_reg_rtx (V2DImode);
+ pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
case IX86_BUILTIN_FEMMS:
emit_insn (gen_femms ());
return NULL_RTX;
return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
}
+#if 0
+static __m128i __attribute__((__always_inline__))
+_mm_srli_si128 (__m128i __A, const int __B)
+{
+ return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B))
+}
+
+static __m128i __attribute__((__always_inline__))
+_mm_srli_si128 (__m128i __A, const int __B)
+{
+ return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B))
+}
+#endif
+#define _mm_srli_si128(__A, __B) ((__m128i)__builtin_ia32_psrldqi128 (__A, __B))
+#define _mm_slli_si128(__A, __B) ((__m128i)__builtin_ia32_pslldqi128 (__A, __B))
+
static __inline __m128i
_mm_srli_epi16 (__m128i __A, int __B)
{
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O2 -msse" } */
+/* { dg-options "-O2 -msse -msse2" } */
/* Test that the intrinsics compile with optimization. These were not
tested in i386-sse-[12].c because these builtins require immediate
_mm_prefetch (p+8, _MM_HINT_T2);
_mm_prefetch (p+12, _MM_HINT_NTA);
}
+
+__m128i
+test__slli_si128 (__m128i a)
+{
+ return _mm_slli_si128 (a, 3);
+}
+
+__m128i
+test__srli_si128 (__m128i a)
+{
+ return _mm_srli_si128 (a, 3);
+}