2 /* { dg-require-effective-target ssse3 } */
3 /* { dg-options "-O2 -fno-strict-aliasing -mssse3" } */
6 #define CHECK_H "ssse3-check.h"
10 #define TEST ssse3_test
15 #include "ssse3-vals.h"
17 #include <tmmintrin.h>
21 /* Test the 64-bit form */
23 ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
25 __m64 t1 = *(__m64 *) i1;
26 __m64 t2 = *(__m64 *) i2;
31 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 0);
34 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 1);
37 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 2);
40 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 3);
43 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 4);
46 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 5);
49 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 6);
52 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 7);
55 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 8);
58 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 9);
61 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 10);
64 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 11);
67 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 12);
70 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 13);
73 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 14);
76 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 15);
79 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 16);
87 /* Test the 128-bit form */
89 ssse3_test_palignr128 (int *i1, int *i2, unsigned int imm, int *r)
91 /* Assumes incoming pointers are 16-byte aligned */
92 __m128i t1 = *(__m128i *) i1;
93 __m128i t2 = *(__m128i *) i2;
98 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 0);
101 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 1);
104 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 2);
107 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 3);
110 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 4);
113 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 5);
116 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 6);
119 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 7);
122 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 8);
125 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 9);
128 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 10);
131 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 11);
134 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 12);
137 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 13);
140 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 14);
143 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 15);
146 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 16);
149 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 17);
152 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 18);
155 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 19);
158 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 20);
161 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 21);
164 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 22);
167 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 23);
170 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 24);
173 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 25);
176 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 26);
179 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 27);
182 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 28);
185 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 29);
188 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 30);
191 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 31);
194 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 32);
199 /* Routine to manually compute the results */
201 compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r)
204 char *bout = (char *) r;
207 memcpy (&buf[0], i2, 16);
208 memcpy (&buf[16], i1, 16);
210 for (i = 0; i < 16; i++)
211 if (imm >= 32 || imm + i >= 32)
214 bout[i] = buf[imm + i];
219 compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
222 char *bout = (char *)r;
225 /* Handle the first half */
226 memcpy (&buf[0], i2, 8);
227 memcpy (&buf[8], i1, 8);
229 for (i = 0; i < 8; i++)
230 if (imm >= 16 || imm + i >= 16)
233 bout[i] = buf[imm + i];
235 /* Handle the second half */
236 memcpy (&buf[0], &i2[2], 8);
237 memcpy (&buf[8], &i1[2], 8);
239 for (i = 0; i < 8; i++)
240 if (imm >= 16 || imm + i >= 16)
243 bout[i + 8] = buf[imm + i];
251 int r [4] __attribute__ ((aligned(16)));
256 for (i = 0; i < 256; i += 8)
257 for (imm = 0; imm < 100; imm++)
260 /* Manually compute the result */
261 compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
263 /* Run the 64-bit tests */
264 ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]);
265 ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]);
266 fail += chk_128 (ck, r);
269 /* Recompute the results for 128-bits */
270 compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck);
272 /* Run the 128-bit tests */
273 ssse3_test_palignr128 (&vals[i + 0], &vals[i + 4], imm, r);
274 fail += chk_128 (ck, r);