static __inline __m128i __attribute__((__always_inline__))
_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_pmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
+ return (__m128i) __builtin_ia32_pmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
}
static __inline __m128i __attribute__((__always_inline__))
static __inline __m128i __attribute__((__always_inline__))
_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_pmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+ return (__m128i) __builtin_ia32_pmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_pmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+ return (__m128i) __builtin_ia32_pmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
}
static __inline __m128i __attribute__((__always_inline__))
_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_pmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+ return (__m128i) __builtin_ia32_pmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
}
static __inline __m128i __attribute__((__always_inline__))
/* Packed Integer Rotates and Shifts */
/* Rotates - Non-Immediate form */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__))
_mm_rot_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_protb ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__))
_mm_rot_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_protw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__))
_mm_rot_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_protd ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__))
_mm_rot_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_protq ((__v2di)__A, (__v2di)__B);
/* Rotates - Immediate form */
#ifdef __OPTIMIZE__
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__))
_mm_roti_epi8(__m128i __A, int __B)
{
return (__m128i) __builtin_ia32_protbi ((__v16qi)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__))
_mm_roti_epi16(__m128i __A, int __B)
{
return (__m128i) __builtin_ia32_protwi ((__v8hi)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__))
_mm_roti_epi32(__m128i __A, int __B)
{
return (__m128i) __builtin_ia32_protdi ((__v4si)__A, __B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__))
_mm_roti_epi64(__m128i __A, int __B)
{
return (__m128i) __builtin_ia32_protqi ((__v2di)__A, __B);
/* pshl */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__))
_mm_shl_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pshlb ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__))
_mm_shl_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pshlw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__))
_mm_shl_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pshld ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__))
_mm_shl_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pshlq ((__v2di)__A, (__v2di)__B);
}
/* psha */
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__))
_mm_sha_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pshab ((__v16qi)__A, (__v16qi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__))
_mm_sha_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pshaw ((__v8hi)__A, (__v8hi)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__))
_mm_sha_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pshad ((__v4si)__A, (__v4si)__B);
}
-static __inline __m128i __attribute__((__always_inline__))
+static __inline __m128i __attribute__((__always_inline__))
_mm_sha_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pshaq ((__v2di)__A, (__v2di)__B);
return (__m128) __builtin_ia32_comuneqps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__))
_mm_comnlt_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comunltps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
-_mm_comnle_ps(__m128 __A, __m128 __B)
+static __inline __m128 __attribute__((__always_inline__))
+_mm_comnle_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comunleps ((__v4sf)__A, (__v4sf)__B);
}
return (__m128) __builtin_ia32_comueqps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__))
_mm_comnge_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comungeps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__))
_mm_comngt_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comungtps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__))
_mm_comfalse_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comfalseps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__))
_mm_comoneq_ps(__m128 __A, __m128 __B)
{
- return (__m128) __builtin_ia32_comneqps ((__v4sf)__A, (__v4sf)__B);
+ return (__m128) __builtin_ia32_comneqps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__))
_mm_comge_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comgeps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__))
_mm_comgt_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comgtps ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__))
_mm_comtrue_ps(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comtrueps ((__v4sf)__A, (__v4sf)__B);
return (__m128d) __builtin_ia32_comuneqpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__))
_mm_comnlt_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comunltpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
-_mm_comnle_pd(__m128d __A, __m128d __B)
+static __inline __m128d __attribute__((__always_inline__))
+_mm_comnle_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comunlepd ((__v2df)__A, (__v2df)__B);
}
return (__m128d) __builtin_ia32_comueqpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__))
_mm_comnge_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comungepd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__))
_mm_comngt_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comungtpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__))
_mm_comfalse_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comfalsepd ((__v2df)__A, (__v2df)__B);
return (__m128d) __builtin_ia32_comneqpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__))
_mm_comge_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comgepd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__))
_mm_comgt_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comgtpd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__))
_mm_comtrue_pd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comtruepd ((__v2df)__A, (__v2df)__B);
return (__m128) __builtin_ia32_comuneqss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__))
_mm_comnlt_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comunltss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
-_mm_comnle_ss(__m128 __A, __m128 __B)
+static __inline __m128 __attribute__((__always_inline__))
+_mm_comnle_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comunless ((__v4sf)__A, (__v4sf)__B);
}
return (__m128) __builtin_ia32_comueqss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__))
_mm_comnge_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comungess ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__))
_mm_comngt_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comungtss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__))
_mm_comfalse_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comfalsess ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__))
_mm_comoneq_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comneqss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__))
_mm_comge_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comgess ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__))
_mm_comgt_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comgtss ((__v4sf)__A, (__v4sf)__B);
}
-static __inline __m128 __attribute__((__always_inline__))
+static __inline __m128 __attribute__((__always_inline__))
_mm_comtrue_ss(__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_comtruess ((__v4sf)__A, (__v4sf)__B);
return (__m128d) __builtin_ia32_comuneqsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__))
_mm_comnlt_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comunltsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
-_mm_comnle_sd(__m128d __A, __m128d __B)
+static __inline __m128d __attribute__((__always_inline__))
+_mm_comnle_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comunlesd ((__v2df)__A, (__v2df)__B);
}
return (__m128d) __builtin_ia32_comueqsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__))
_mm_comnge_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comungesd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__))
_mm_comngt_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comungtsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__))
_mm_comfalse_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comfalsesd ((__v2df)__A, (__v2df)__B);
return (__m128d) __builtin_ia32_comneqsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__))
_mm_comge_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comgesd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__))
_mm_comgt_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comgtsd ((__v2df)__A, (__v2df)__B);
}
-static __inline __m128d __attribute__((__always_inline__))
+static __inline __m128d __attribute__((__always_inline__))
_mm_comtrue_sd(__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_comtruesd ((__v2df)__A, (__v2df)__B);
_mm_comlt_epu8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomltub ((__v16qi)__A, (__v16qi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comle_epu8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomleub ((__v16qi)__A, (__v16qi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comgt_epu8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgtub ((__v16qi)__A, (__v16qi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comge_epu8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgeub ((__v16qi)__A, (__v16qi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comeq_epu8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomequb ((__v16qi)__A, (__v16qi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comneq_epu8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomnequb ((__v16qi)__A, (__v16qi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comfalse_epu8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomfalseub ((__v16qi)__A, (__v16qi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comtrue_epu8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomtrueub ((__v16qi)__A, (__v16qi)__B);
-}
+}
/*pcom (integer, unsinged words) */
_mm_comlt_epu16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomltuw ((__v8hi)__A, (__v8hi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comle_epu16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomleuw ((__v8hi)__A, (__v8hi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comgt_epu16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgtuw ((__v8hi)__A, (__v8hi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comge_epu16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgeuw ((__v8hi)__A, (__v8hi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comeq_epu16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomequw ((__v8hi)__A, (__v8hi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comneq_epu16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomnequw ((__v8hi)__A, (__v8hi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comfalse_epu16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomfalseuw ((__v8hi)__A, (__v8hi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comtrue_epu16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomtrueuw ((__v8hi)__A, (__v8hi)__B);
-}
+}
/*pcom (integer, unsinged double words) */
_mm_comlt_epu32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomltud ((__v4si)__A, (__v4si)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comle_epu32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomleud ((__v4si)__A, (__v4si)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comgt_epu32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgtud ((__v4si)__A, (__v4si)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comge_epu32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgeud ((__v4si)__A, (__v4si)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comeq_epu32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomequd ((__v4si)__A, (__v4si)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comneq_epu32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomnequd ((__v4si)__A, (__v4si)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comfalse_epu32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomfalseud ((__v4si)__A, (__v4si)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comtrue_epu32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomtrueud ((__v4si)__A, (__v4si)__B);
-}
+}
/*pcom (integer, unsinged quad words) */
_mm_comlt_epu64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomltuq ((__v2di)__A, (__v2di)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comle_epu64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomleuq ((__v2di)__A, (__v2di)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comgt_epu64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgtuq ((__v2di)__A, (__v2di)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comge_epu64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgeuq ((__v2di)__A, (__v2di)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comeq_epu64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomequq ((__v2di)__A, (__v2di)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comneq_epu64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomnequq ((__v2di)__A, (__v2di)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comfalse_epu64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomfalseuq ((__v2di)__A, (__v2di)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comtrue_epu64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomtrueuq ((__v2di)__A, (__v2di)__B);
-}
+}
/*pcom (integer, signed bytes) */
_mm_comlt_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomltb ((__v16qi)__A, (__v16qi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comle_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomleb ((__v16qi)__A, (__v16qi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comgt_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgtb ((__v16qi)__A, (__v16qi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comge_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgeb ((__v16qi)__A, (__v16qi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comeq_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomeqb ((__v16qi)__A, (__v16qi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comneq_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomneqb ((__v16qi)__A, (__v16qi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comfalse_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomfalseb ((__v16qi)__A, (__v16qi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comtrue_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomtrueb ((__v16qi)__A, (__v16qi)__B);
-}
+}
/*pcom (integer, signed words) */
_mm_comlt_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomltw ((__v8hi)__A, (__v8hi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comle_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomlew ((__v8hi)__A, (__v8hi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comgt_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgtw ((__v8hi)__A, (__v8hi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comge_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgew ((__v8hi)__A, (__v8hi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comeq_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomeqw ((__v8hi)__A, (__v8hi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comneq_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomneqw ((__v8hi)__A, (__v8hi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comfalse_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomfalsew ((__v8hi)__A, (__v8hi)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comtrue_epi16(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomtruew ((__v8hi)__A, (__v8hi)__B);
-}
+}
/*pcom (integer, signed double words) */
_mm_comlt_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomltd ((__v4si)__A, (__v4si)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comle_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomled ((__v4si)__A, (__v4si)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comgt_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgtd ((__v4si)__A, (__v4si)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comge_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomged ((__v4si)__A, (__v4si)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comeq_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomeqd ((__v4si)__A, (__v4si)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comneq_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomneqd ((__v4si)__A, (__v4si)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comfalse_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomfalsed ((__v4si)__A, (__v4si)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comtrue_epi32(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomtrued ((__v4si)__A, (__v4si)__B);
-}
+}
/*pcom (integer, signed quad words) */
_mm_comlt_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomltq ((__v2di)__A, (__v2di)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comle_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomleq ((__v2di)__A, (__v2di)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comgt_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgtq ((__v2di)__A, (__v2di)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comge_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomgeq ((__v2di)__A, (__v2di)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comeq_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomeqq ((__v2di)__A, (__v2di)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comneq_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomneqq ((__v2di)__A, (__v2di)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comfalse_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomfalseq ((__v2di)__A, (__v2di)__B);
-}
+}
static __inline __m128i __attribute__((__always_inline__))
_mm_comtrue_epi64(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_pcomtrueq ((__v2di)__A, (__v2di)__B);
-}
+}
/* FRCZ */
static __inline __m128 __attribute__((__always_inline__))
; For SSE4A support
(UNSPEC_EXTRQI 130)
- (UNSPEC_EXTRQ 131)
+ (UNSPEC_EXTRQ 131)
(UNSPEC_INSERTQI 132)
(UNSPEC_INSERTQ 133)
(set_attr "mode" "SI")])
;; Pentium Pro can do steps 1 through 3 in one go.
-;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes)
+;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes)
(define_insn "*cmpfp_i_mixed"
[(set (reg:CCFP FLAGS_REG)
(compare:CCFP (match_operand 0 "register_operand" "f,x")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
(set_attr "athlon_decode" "vector")
- (set_attr "amdfam10_decode" "double")])
+ (set_attr "amdfam10_decode" "double")])
(define_expand "movhi"
[(set (match_operand:HI 0 "nonimmediate_operand" "")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
(set_attr "athlon_decode" "vector")
- (set_attr "amdfam10_decode" "double")])
+ (set_attr "amdfam10_decode" "double")])
;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
(define_insn "*swaphi_2"
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
(set_attr "athlon_decode" "vector")
- (set_attr "amdfam10_decode" "vector")])
+ (set_attr "amdfam10_decode" "vector")])
;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
(define_insn "*swapqi_2"
(set_attr "mode" "DI")
(set_attr "pent_pair" "np")
(set_attr "athlon_decode" "vector")
- (set_attr "amdfam10_decode" "double")])
+ (set_attr "amdfam10_decode" "double")])
(define_expand "movti"
[(set (match_operand:TI 0 "nonimmediate_operand" "")
[(set (match_operand:DF 0 "register_operand" "")
(float_extend:DF
(match_operand:SF 1 "nonimmediate_operand" "")))]
- "(TARGET_USE_VECTOR_CONVERTS || TARGET_GENERIC) && !optimize_size
+ "(TARGET_USE_VECTOR_CONVERTS || TARGET_GENERIC) && !optimize_size
&& reload_completed && SSE_REG_P (operands[0])"
[(set (match_dup 2)
(float_extend:V2DF
emit_insn (gen_sse_unpcklps (operands[3], operands[3], operands[3]));
}
else
- emit_insn (gen_vec_setv4sf_0 (operands[3],
+ emit_insn (gen_vec_setv4sf_0 (operands[3],
CONST0_RTX (V4SFmode), operands[1]));
})
[(set (match_operand:SF 0 "register_operand" "")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "")))]
- "(TARGET_USE_VECTOR_CONVERTS || TARGET_GENERIC) && !optimize_size
+ "(TARGET_USE_VECTOR_CONVERTS || TARGET_GENERIC) && !optimize_size
&& reload_completed && SSE_REG_P (operands[0])"
[(set (match_dup 2)
(vec_concat:V4SF
(set_attr "mode" "HI")
(set_attr "unit" "i387")
(set_attr "athlon_decode" "vector")
- (set_attr "amdfam10_decode" "vector")])
+ (set_attr "amdfam10_decode" "vector")])
\f
;; Conversion between fixed point and floating point.
(define_insn "*floatsisf2_mixed_vector"
[(set (match_operand:SF 0 "register_operand" "=x,f,?f")
(float:SF (match_operand:SI 1 "nonimmediate_operand" "x,m,r")))]
- "TARGET_MIX_SSE_I387 && !flag_trapping_math
+ "TARGET_MIX_SSE_I387 && !flag_trapping_math
&& TARGET_USE_VECTOR_CONVERTS && !optimize_size"
"@
cvtdq2ps\t{%1, %0|%0, %1}
"#"
[(set_attr "type" "multi")])
-(define_split
+(define_split
[(set (match_operand:SF 0 "register_operand" "")
(float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
"flag_trapping_math
emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), operands[1]));
})
-(define_split
+(define_split
[(set (match_operand:SF 0 "register_operand" "")
(float:SF (match_operand:SI 1 "register_operand" "")))]
"flag_trapping_math
(set_attr "amdfam10_decode" "double")
(set_attr "fp_int_src" "true")])
-(define_split
+(define_split
[(set (match_operand:DF 0 "register_operand" "")
(float:DF (match_operand:SI 1 "memory_operand" "")))]
"TARGET_USE_VECTOR_CONVERTS && reload_completed
"TARGET_64BIT"
"")
-;; On AMDFAM10
+;; On AMDFAM10
;; IMUL reg64, reg64, imm8 Direct
;; IMUL reg64, mem64, imm8 VectorPath
;; IMUL reg64, reg64, imm32 Direct
-;; IMUL reg64, mem64, imm32 VectorPath
+;; IMUL reg64, mem64, imm32 VectorPath
;; IMUL reg64, reg64 Direct
;; IMUL reg64, mem64 Direct
(cond [(and (eq_attr "alternative" "0,1")
(match_operand 1 "memory_operand" ""))
(const_string "vector")]
- (const_string "direct")))
+ (const_string "direct")))
(set_attr "mode" "DI")])
(define_expand "mulsi3"
""
"")
-;; On AMDFAM10
+;; On AMDFAM10
;; IMUL reg32, reg32, imm8 Direct
;; IMUL reg32, mem32, imm8 VectorPath
;; IMUL reg32, reg32, imm32 Direct
(cond [(and (eq_attr "alternative" "0,1")
(match_operand 1 "memory_operand" ""))
(const_string "vector")]
- (const_string "direct")))
+ (const_string "direct")))
(set_attr "mode" "SI")])
(define_insn "*mulsi3_1_zext"
(cond [(and (eq_attr "alternative" "0,1")
(match_operand 1 "memory_operand" ""))
(const_string "vector")]
- (const_string "direct")))
+ (const_string "direct")))
(set_attr "mode" "SI")])
(define_expand "mulhi3"
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "direct")))
- (set_attr "amdfam10_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")
(set_attr "mode" "QI")])
(define_expand "umulqihi3"
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "direct")))
- (set_attr "amdfam10_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")
(set_attr "mode" "QI")])
(define_expand "umulditi3"
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
- (set_attr "amdfam10_decode" "double")
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "DI")])
;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
- (set_attr "amdfam10_decode" "double")
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
(define_expand "mulditi3"
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
- (set_attr "amdfam10_decode" "double")
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
(define_expand "umuldi3_highpart"
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
- (set_attr "amdfam10_decode" "double")
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "DI")])
(define_expand "umulsi3_highpart"
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
(set_attr "athlon_decode" "vector")
- (set_attr "amdfam10_decode" "vector")])
+ (set_attr "amdfam10_decode" "vector")])
(define_expand "x86_64_shift_adj"
[(set (reg:CCZ FLAGS_REG)
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
(set_attr "athlon_decode" "vector")
- (set_attr "amdfam10_decode" "vector")])
+ (set_attr "amdfam10_decode" "vector")])
(define_expand "x86_shift_adj_1"
[(set (reg:CCZ FLAGS_REG)
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
(set_attr "athlon_decode" "vector")
- (set_attr "amdfam10_decode" "vector")])
+ (set_attr "amdfam10_decode" "vector")])
(define_expand "ashrdi3"
[(set (match_operand:DI 0 "shiftdi_operand" "")
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")
- (set_attr "athlon_decode" "direct")
+ (set_attr "athlon_decode" "direct")
(set_attr "amdfam10_decode" "direct")])
(define_insn "*rsqrtsf2_sse"
;; SSE5 conditional move
(define_insn "*sse5_pcmov_<mode>"
[(set (match_operand:MODEF 0 "register_operand" "=x,x,x,x")
- (if_then_else:MODEF
+ (if_then_else:MODEF
(match_operand:MODEF 1 "nonimmediate_operand" "xm,x,0,0")
(match_operand:MODEF 2 "nonimmediate_operand" "0,0,x,xm")
(match_operand:MODEF 3 "vector_move_operand" "x,xm,xm,x")))]