1 /* { dg-do run { target i?86-*-* x86_64-*-* } } */
2 /* { dg-require-effective-target ssse3 } */
3 /* { dg-options "-O2 -mssse3" } */
7 #include "../../gcc.dg/i386-cpuid.h"
8 #include "ssse3-vals.h"
10 static void ssse3_test (void);
15 unsigned long cpu_facilities;
17 cpu_facilities = i386_cpuid_ecx ();
19 /* Run SSSE3 test only if host has SSSE3 support. */
20 if ((cpu_facilities & bit_SSSE3))
26 /* Test the 64-bit form */
28 ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
30 __m64 t1 = *(__m64 *) i1;
31 __m64 t2 = *(__m64 *) i2;
36 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 0);
39 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 1);
42 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 2);
45 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 3);
48 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 4);
51 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 5);
54 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 6);
57 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 7);
60 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 8);
63 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 9);
66 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 10);
69 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 11);
72 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 12);
75 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 13);
78 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 14);
81 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 15);
84 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 16);
91 /* Test the 128-bit form */
93 ssse3_test_palignr128 (int *i1, int *i2, unsigned int imm, int *r)
95 /* Assumes incoming pointers are 16-byte aligned */
96 __m128i t1 = *(__m128i *) i1;
97 __m128i t2 = *(__m128i *) i2;
102 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 0);
105 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 1);
108 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 2);
111 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 3);
114 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 4);
117 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 5);
120 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 6);
123 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 7);
126 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 8);
129 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 9);
132 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 10);
135 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 11);
138 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 12);
141 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 13);
144 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 14);
147 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 15);
150 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 16);
153 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 17);
156 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 18);
159 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 19);
162 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 20);
165 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 21);
168 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 22);
171 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 23);
174 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 24);
177 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 25);
180 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 26);
183 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 27);
186 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 28);
189 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 29);
192 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 30);
195 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 31);
198 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 32);
203 /* Routine to manually compute the results */
205 compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r)
208 char *bout = (char *) r;
211 memcpy (&buf[0], i2, 16);
212 memcpy (&buf[16], i1, 16);
214 for (i = 0; i < 16; i++)
215 if (imm >= 32 || imm + i >= 32)
218 bout[i] = buf[imm + i];
222 compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
225 char *bout = (char *)r;
228 /* Handle the first half */
229 memcpy (&buf[0], i2, 8);
230 memcpy (&buf[8], i1, 8);
232 for (i = 0; i < 8; i++)
233 if (imm >= 16 || imm + i >= 16)
236 bout[i] = buf[imm + i];
238 /* Handle the second half */
239 memcpy (&buf[0], &i2[2], 8);
240 memcpy (&buf[8], &i1[2], 8);
242 for (i = 0; i < 8; i++)
243 if (imm >= 16 || imm + i >= 16)
246 bout[i + 8] = buf[imm + i];
253 int r [4] __attribute__ ((aligned(16)));
258 for (i = 0; i < 256; i += 8)
259 for (imm = 0; imm < 100; imm++)
261 /* Manually compute the result */
262 compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
264 /* Run the 64-bit tests */
265 ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]);
266 ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]);
267 fail += chk_128 (ck, r);
269 /* Recompute the results for 128-bits */
270 compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck);
272 /* Run the 128-bit tests */
273 ssse3_test_palignr128 (&vals[i + 0], &vals[i + 4], imm, r);
274 fail += chk_128 (ck, r);