2 /* { dg-require-effective-target ssse3 } */
3 /* { dg-options "-O2 -fno-strict-aliasing -mssse3" } */
5 #include "ssse3-check.h"
6 #include "ssse3-vals.h"
11 /* Test the 64-bit form */
13 ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
15 __m64 t1 = *(__m64 *) i1;
16 __m64 t2 = *(__m64 *) i2;
21 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 0);
24 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 1);
27 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 2);
30 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 3);
33 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 4);
36 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 5);
39 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 6);
42 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 7);
45 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 8);
48 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 9);
51 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 10);
54 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 11);
57 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 12);
60 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 13);
63 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 14);
66 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 15);
69 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 16);
76 /* Test the 128-bit form */
78 ssse3_test_palignr128 (int *i1, int *i2, unsigned int imm, int *r)
80 /* Assumes incoming pointers are 16-byte aligned */
81 __m128i t1 = *(__m128i *) i1;
82 __m128i t2 = *(__m128i *) i2;
87 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 0);
90 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 1);
93 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 2);
96 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 3);
99 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 4);
102 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 5);
105 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 6);
108 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 7);
111 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 8);
114 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 9);
117 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 10);
120 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 11);
123 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 12);
126 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 13);
129 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 14);
132 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 15);
135 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 16);
138 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 17);
141 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 18);
144 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 19);
147 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 20);
150 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 21);
153 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 22);
156 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 23);
159 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 24);
162 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 25);
165 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 26);
168 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 27);
171 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 28);
174 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 29);
177 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 30);
180 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 31);
183 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 32);
188 /* Routine to manually compute the results */
190 compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r)
193 char *bout = (char *) r;
196 memcpy (&buf[0], i2, 16);
197 memcpy (&buf[16], i1, 16);
199 for (i = 0; i < 16; i++)
200 if (imm >= 32 || imm + i >= 32)
203 bout[i] = buf[imm + i];
207 compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
210 char *bout = (char *)r;
213 /* Handle the first half */
214 memcpy (&buf[0], i2, 8);
215 memcpy (&buf[8], i1, 8);
217 for (i = 0; i < 8; i++)
218 if (imm >= 16 || imm + i >= 16)
221 bout[i] = buf[imm + i];
223 /* Handle the second half */
224 memcpy (&buf[0], &i2[2], 8);
225 memcpy (&buf[8], &i1[2], 8);
227 for (i = 0; i < 8; i++)
228 if (imm >= 16 || imm + i >= 16)
231 bout[i + 8] = buf[imm + i];
238 int r [4] __attribute__ ((aligned(16)));
243 for (i = 0; i < 256; i += 8)
244 for (imm = 0; imm < 100; imm++)
246 /* Manually compute the result */
247 compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
249 /* Run the 64-bit tests */
250 ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]);
251 ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]);
252 fail += chk_128 (ck, r);
254 /* Recompute the results for 128-bits */
255 compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck);
257 /* Run the 128-bit tests */
258 ssse3_test_palignr128 (&vals[i + 0], &vals[i + 4], imm, r);
259 fail += chk_128 (ck, r);