-#define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B))
-#define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B))
-#define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B))
+#ifdef __OPTIMIZE__
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
+_mm_shufflehi_epi16 (__m128i __A, const int __mask)
+{
+ return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
+}
+
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
+_mm_shufflelo_epi16 (__m128i __A, const int __mask)
+{
+ return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
+}
+
+static __inline __m128i __attribute__((__always_inline__, __artificial__))
+_mm_shuffle_epi32 (__m128i __A, const int __mask)
+{
+ return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
+}
+#else
+#define _mm_shufflehi_epi16(__A, __B) \
+ ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B))
+#define _mm_shufflelo_epi16(__A, __B) \
+ ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B))
+#define _mm_shuffle_epi32(__A, __B) \
+ ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B))
+#endif